Re: [PATCH 6/8] drm/amdgpu: always reserve two slots for the VM

2018-10-23 Thread Zhang, Jerry(Junwei)

On 10/4/18 9:12 PM, Christian König wrote:

And drop the now superflous extra reservations.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 ++-
  2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index b8de56d1a866..ba406bd1b08f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -964,10 +964,6 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser 
*p)
if (r)
return r;
  
-	r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1);

-   if (r)
-   return r;
-
p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
  
  	if (amdgpu_vm_debug) {

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 218527bb0156..1b39b0144698 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -616,7 +616,8 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
  {
entry->priority = 0;
entry->tv.bo = >root.base.bo->tbo;
-   entry->tv.num_shared = 1;
+   /* One for the VM updates and one for the CS job */
+   entry->tv.num_shared = 2;
entry->user_pages = NULL;
list_add(>tv.head, validated);
  }
@@ -772,10 +773,6 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
  
  	ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);
  
-	r = reservation_object_reserve_shared(bo->tbo.resv, 1);

-   if (r)
-   return r;
-


A trivial thing, this change may belong to next patch.
this patch looks dropping the resv for root bo.

Regards,
Jerry


r = ttm_bo_validate(>tbo, >placement, );
if (r)
goto error;
@@ -1839,10 +1836,6 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
if (r)
goto error_free;
  
-	r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1);

-   if (r)
-   goto error_free;
-
r = amdgpu_vm_update_ptes(, start, last + 1, addr, flags);
if (r)
goto error_free;
@@ -3023,6 +3016,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
if (r)
goto error_free_root;
  
+	r = reservation_object_reserve_shared(root->tbo.resv, 1);

+   if (r)
+   goto error_unreserve;
+
r = amdgpu_vm_clear_bo(adev, vm, root,
   adev->vm_manager.root_level,
   vm->pte_support_ats);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/8] dma-buf: remove shared fence staging in reservation object

2018-10-23 Thread Zhang, Jerry(Junwei)

Patch 3, 5 is
Acked-by: Junwei Zhang 

Others are
Reviewed-by: Junwei Zhang 

On 10/4/18 9:12 PM, Christian König wrote:

No need for that any more. Just replace the list when there isn't enough
room any more for the additional fence.

Signed-off-by: Christian König 
---
  drivers/dma-buf/reservation.c | 178 ++
  include/linux/reservation.h   |   4 -
  2 files changed, 58 insertions(+), 124 deletions(-)

diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index 6c95f61a32e7..5825fc336a13 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -68,105 +68,23 @@ EXPORT_SYMBOL(reservation_seqcount_string);
   */
  int reservation_object_reserve_shared(struct reservation_object *obj)
  {
-   struct reservation_object_list *fobj, *old;
-   u32 max;
+   struct reservation_object_list *old, *new;
+   unsigned int i, j, k, max;
  
  	old = reservation_object_get_list(obj);
  
  	if (old && old->shared_max) {

-   if (old->shared_count < old->shared_max) {
-   /* perform an in-place update */
-   kfree(obj->staged);
-   obj->staged = NULL;
+   if (old->shared_count < old->shared_max)
return 0;
-   } else
+   else
max = old->shared_max * 2;
-   } else
-   max = 4;
-
-   /*
-* resize obj->staged or allocate if it doesn't exist,
-* noop if already correct size
-*/
-   fobj = krealloc(obj->staged, offsetof(typeof(*fobj), shared[max]),
-   GFP_KERNEL);
-   if (!fobj)
-   return -ENOMEM;
-
-   obj->staged = fobj;
-   fobj->shared_max = max;
-   return 0;
-}
-EXPORT_SYMBOL(reservation_object_reserve_shared);
-
-static void
-reservation_object_add_shared_inplace(struct reservation_object *obj,
- struct reservation_object_list *fobj,
- struct dma_fence *fence)
-{
-   struct dma_fence *signaled = NULL;
-   u32 i, signaled_idx;
-
-   dma_fence_get(fence);
-
-   preempt_disable();
-   write_seqcount_begin(>seq);
-
-   for (i = 0; i < fobj->shared_count; ++i) {
-   struct dma_fence *old_fence;
-
-   old_fence = rcu_dereference_protected(fobj->shared[i],
-   reservation_object_held(obj));
-
-   if (old_fence->context == fence->context) {
-   /* memory barrier is added by write_seqcount_begin */
-   RCU_INIT_POINTER(fobj->shared[i], fence);
-   write_seqcount_end(>seq);
-   preempt_enable();
-
-   dma_fence_put(old_fence);
-   return;
-   }
-
-   if (!signaled && dma_fence_is_signaled(old_fence)) {
-   signaled = old_fence;
-   signaled_idx = i;
-   }
-   }
-
-   /*
-* memory barrier is added by write_seqcount_begin,
-* fobj->shared_count is protected by this lock too
-*/
-   if (signaled) {
-   RCU_INIT_POINTER(fobj->shared[signaled_idx], fence);
} else {
-   BUG_ON(fobj->shared_count >= fobj->shared_max);
-   RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence);
-   fobj->shared_count++;
+   max = 4;
}
  
-	write_seqcount_end(>seq);

-   preempt_enable();
-
-   dma_fence_put(signaled);
-}
-
-static void
-reservation_object_add_shared_replace(struct reservation_object *obj,
- struct reservation_object_list *old,
- struct reservation_object_list *fobj,
- struct dma_fence *fence)
-{
-   unsigned i, j, k;
-
-   dma_fence_get(fence);
-
-   if (!old) {
-   RCU_INIT_POINTER(fobj->shared[0], fence);
-   fobj->shared_count = 1;
-   goto done;
-   }
+   new = kmalloc(offsetof(typeof(*new), shared[max]), GFP_KERNEL);
+   if (!new)
+   return -ENOMEM;
  
  	/*

 * no need to bump fence refcounts, rcu_read access
@@ -174,46 +92,45 @@ reservation_object_add_shared_replace(struct 
reservation_object *obj,
 * references from the old struct are carried over to
 * the new.
 */
-   for (i = 0, j = 0, k = fobj->shared_max; i < old->shared_count; ++i) {
-   struct dma_fence *check;
+   for (i = 0, j = 0, k = max; i < (old ? old->shared_count : 0); ++i) {
+   struct dma_fence *fence;
  
-		check = rcu_dereference_protected(old->shared[i],

-   reservation_object_held(obj));
-
-   if (check->context == fence->context 

RE: [PATCH 1/8] dma-buf: remove shared fence staging in reservation object

2018-10-23 Thread Huang, Ray
Christian, I will take a look at them later.

Thanks,
Ray

> -Original Message-
> From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com]
> Sent: Tuesday, October 23, 2018 8:20 PM
> To: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; linux-
> me...@vger.kernel.org; linaro-mm-...@lists.linaro.org; Huang, Ray
> ; Daenzer, Michel 
> Cc: Daniel Vetter ; Chris Wilson ;
> Zhang, Jerry 
> Subject: Re: [PATCH 1/8] dma-buf: remove shared fence staging in
> reservation object
> 
> Ping once more! Adding a few more AMD people.
> 
> Any comments on this?
> 
> Thanks,
> Christian.
> 
> Am 12.10.18 um 10:22 schrieb Christian König:
> > Ping! Adding a few people directly and more mailing lists.
> >
> > Can I get an acked-by/rb for this? It's only a cleanup and not much
> > functional change.
> >
> > Regards,
> > Christian.
> >
> > Am 04.10.2018 um 15:12 schrieb Christian König:
> >> No need for that any more. Just replace the list when there isn't
> >> enough room any more for the additional fence.
> >>
> >> Signed-off-by: Christian König 
> >> ---
> >>   drivers/dma-buf/reservation.c | 178
> >> ++
> >>   include/linux/reservation.h   |   4 -
> >>   2 files changed, 58 insertions(+), 124 deletions(-)
> >>
> >> diff --git a/drivers/dma-buf/reservation.c
> >> b/drivers/dma-buf/reservation.c index 6c95f61a32e7..5825fc336a13
> >> 100644
> >> --- a/drivers/dma-buf/reservation.c
> >> +++ b/drivers/dma-buf/reservation.c
> >> @@ -68,105 +68,23 @@ EXPORT_SYMBOL(reservation_seqcount_string);
> >>    */
> >>   int reservation_object_reserve_shared(struct reservation_object
> >> *obj)
> >>   {
> >> -    struct reservation_object_list *fobj, *old;
> >> -    u32 max;
> >> +    struct reservation_object_list *old, *new;
> >> +    unsigned int i, j, k, max;
> >>     old = reservation_object_get_list(obj);
> >>     if (old && old->shared_max) {
> >> -    if (old->shared_count < old->shared_max) {
> >> -    /* perform an in-place update */
> >> -    kfree(obj->staged);
> >> -    obj->staged = NULL;
> >> +    if (old->shared_count < old->shared_max)
> >>   return 0;
> >> -    } else
> >> +    else
> >>   max = old->shared_max * 2;
> >> -    } else
> >> -    max = 4;
> >> -
> >> -    /*
> >> - * resize obj->staged or allocate if it doesn't exist,
> >> - * noop if already correct size
> >> - */
> >> -    fobj = krealloc(obj->staged, offsetof(typeof(*fobj),
> >> shared[max]),
> >> -    GFP_KERNEL);
> >> -    if (!fobj)
> >> -    return -ENOMEM;
> >> -
> >> -    obj->staged = fobj;
> >> -    fobj->shared_max = max;
> >> -    return 0;
> >> -}
> >> -EXPORT_SYMBOL(reservation_object_reserve_shared);
> >> -
> >> -static void
> >> -reservation_object_add_shared_inplace(struct reservation_object
> >> *obj,
> >> -  struct reservation_object_list *fobj,
> >> -  struct dma_fence *fence) -{
> >> -    struct dma_fence *signaled = NULL;
> >> -    u32 i, signaled_idx;
> >> -
> >> -    dma_fence_get(fence);
> >> -
> >> -    preempt_disable();
> >> -    write_seqcount_begin(>seq);
> >> -
> >> -    for (i = 0; i < fobj->shared_count; ++i) {
> >> -    struct dma_fence *old_fence;
> >> -
> >> -    old_fence = rcu_dereference_protected(fobj->shared[i],
> >> -    reservation_object_held(obj));
> >> -
> >> -    if (old_fence->context == fence->context) {
> >> -    /* memory barrier is added by write_seqcount_begin */
> >> -    RCU_INIT_POINTER(fobj->shared[i], fence);
> >> -    write_seqcount_end(>seq);
> >> -    preempt_enable();
> >> -
> >> -    dma_fence_put(old_fence);
> >> -    return;
> >> -    }
> >> -
> >> -    if (!signaled && dma_fence_is_signaled(old_fence)) {
> >> -    signaled = old_fence;
> >> -    signaled_idx = i;
> >> -    }
> >> -    }
> >> -
> >> -    /*
> >> - * memory barrier is added by write_seqcount_begin,
> >> - * fobj->shared_count is protected by this lock too
> >> - */
> >> -    if (signaled) {
> >> -    RCU_INIT_POINTER(fobj->shared[signaled_idx], fence);
> >>   } else {
> >> -    BUG_ON(fobj->shared_count >= fobj->shared_max);
> >> - RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence);
> >> -    fobj->shared_count++;
> >> +    max = 4;
> >>   }
> >>   -    write_seqcount_end(>seq);
> >> -    preempt_enable();
> >> -
> >> -    dma_fence_put(signaled);
> >> -}
> >> -
> >> -static void
> >> -reservation_object_add_shared_replace(struct reservation_object
> >> *obj,
> >> -  struct reservation_object_list *old,
> >> -  struct reservation_object_list *fobj,
> >> -  struct dma_fence *fence) -{
> >> -    unsigned i, j, k;
> >> -
> >> -    dma_fence_get(fence);
> >> -
> >> -    if (!old) {
> >> -    

Re: [PATCH libdrm 2/2] amdgpu: don't track handles for non-memory allocations

2018-10-23 Thread Zhang, Jerry(Junwei)

On 10/24/18 3:07 AM, Marek Olšák wrote:

From: Marek Olšák 


commit log and sign-off here as well.
And any reason for that?

Regards,
Jerry



---
  amdgpu/amdgpu_bo.c | 15 +--
  1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
index 81f8a5f7..00b9b54a 100644
--- a/amdgpu/amdgpu_bo.c
+++ b/amdgpu/amdgpu_bo.c
@@ -91,26 +91,29 @@ drm_public int amdgpu_bo_alloc(amdgpu_device_handle dev,
if (r)
goto out;
  
  	r = amdgpu_bo_create(dev, alloc_buffer->alloc_size, args.out.handle,

 buf_handle);
if (r) {
amdgpu_close_kms_handle(dev, args.out.handle);
goto out;
}
  
-	pthread_mutex_lock(>bo_table_mutex);

-   r = handle_table_insert(>bo_handles, (*buf_handle)->handle,
-   *buf_handle);
-   pthread_mutex_unlock(>bo_table_mutex);
-   if (r)
-   amdgpu_bo_free(*buf_handle);
+   if (alloc_buffer->preferred_heap &
+   (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) {
+   pthread_mutex_lock(>bo_table_mutex);
+   r = handle_table_insert(>bo_handles, (*buf_handle)->handle,
+   *buf_handle);
+   pthread_mutex_unlock(>bo_table_mutex);
+   if (r)
+   amdgpu_bo_free(*buf_handle);
+   }
  out:
return r;
  }
  
  drm_public int amdgpu_bo_set_metadata(amdgpu_bo_handle bo,

  struct amdgpu_bo_metadata *info)
  {
struct drm_amdgpu_gem_metadata args = {};
  
  	args.handle = bo->handle;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH libdrm 1/2] amdgpu: prevent an integer wraparound of cpu_map_count

2018-10-23 Thread Zhang, Jerry(Junwei)

On 10/24/18 3:07 AM, Marek Olšák wrote:

From: Marek Olšák 


We need commit log and sign-off here.

BTW, have you encounter any issue about that?



---
  amdgpu/amdgpu_bo.c | 19 +--
  1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
index c0f42e81..81f8a5f7 100644
--- a/amdgpu/amdgpu_bo.c
+++ b/amdgpu/amdgpu_bo.c
@@ -22,20 +22,21 @@
   *
   */
  
  #include 

  #include 
  #include 
  #include 
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
  
  #include "libdrm_macros.h"

  #include "xf86drm.h"
  #include "amdgpu_drm.h"
  #include "amdgpu_internal.h"
  #include "util_math.h"
  
@@ -442,21 +443,29 @@ drm_public int amdgpu_bo_cpu_map(amdgpu_bo_handle bo, void **cpu)

  {
union drm_amdgpu_gem_mmap args;
void *ptr;
int r;
  
  	pthread_mutex_lock(>cpu_access_mutex);
  
  	if (bo->cpu_ptr) {

/* already mapped */
assert(bo->cpu_map_count > 0);
-   bo->cpu_map_count++;
+
+   /* If the counter has already reached INT_MAX, don't increment
+* it and assume that the buffer will be mapped indefinitely.
+* The buffer is pretty unlikely to get unmapped by the user
+* at this point.
+*/
+   if (bo->cpu_map_count != INT_MAX)
+   bo->cpu_map_count++;


If so, shall we print some error here to notice that indefinite mappings 
come up.


Regards,
Jerry

+
*cpu = bo->cpu_ptr;
pthread_mutex_unlock(>cpu_access_mutex);
return 0;
}
  
  	assert(bo->cpu_map_count == 0);
  
  	memset(, 0, sizeof(args));
  
  	/* Query the buffer address (args.addr_ptr).

@@ -492,21 +501,27 @@ drm_public int amdgpu_bo_cpu_unmap(amdgpu_bo_handle bo)
  
  	pthread_mutex_lock(>cpu_access_mutex);

assert(bo->cpu_map_count >= 0);
  
  	if (bo->cpu_map_count == 0) {

/* not mapped */
pthread_mutex_unlock(>cpu_access_mutex);
return -EINVAL;
}
  
-	bo->cpu_map_count--;

+   /* If the counter has already reached INT_MAX, don't decrement it.
+* This is because amdgpu_bo_cpu_map doesn't increment it past
+* INT_MAX.
+*/
+   if (bo->cpu_map_count != INT_MAX)
+   bo->cpu_map_count--;
+
if (bo->cpu_map_count > 0) {
/* mapped multiple times */
pthread_mutex_unlock(>cpu_access_mutex);
return 0;
}
  
  	r = drm_munmap(bo->cpu_ptr, bo->alloc_size) == 0 ? 0 : -errno;

bo->cpu_ptr = NULL;
pthread_mutex_unlock(>cpu_access_mutex);
return r;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/amdgpu: Reorganize *_flush_gpu_tlb() for kfd to use

2018-10-23 Thread Zhao, Yong
Revised accordingly, and pushed. Thanks!


Yong


From: Kuehling, Felix
Sent: Tuesday, October 23, 2018 4:37:45 PM
To: Zhao, Yong; brahma_sw_dev; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 1/2] drm/amdgpu: Reorganize *_flush_gpu_tlb() for kfd to use

It occurred to me that the flush_type is a hardware-specific value, but
you're using it in a hardware-abstracted interface. If the meaning of
the flush type values changes in future HW-generations, we'll need to
define an abstract enum that gets translated to the respective HW values
in the HW-specific code.

Anyway, this works for now.

One more cosmetic comment inline ...

Other than that the series is Reviewed-by: Felix Kuehling


Regards,
  Felix

On 2018-10-23 2:15 p.m., Zhao, Yong wrote:
> Add a flush_type parameter to that series of functions.
>
> Change-Id: I3dcd71955297c53b181f82e7078981230c642c01
> Signed-off-by: Yong Zhao 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c |  4 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |  4 ++--
>  drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c|  5 +++--
>  drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c|  5 +++--
>  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c|  4 ++--
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 19 ++-
>  6 files changed, 22 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> index 11fea28..9a212aa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> @@ -248,7 +248,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, 
> uint64_t offset,
>}
>mb();
>amdgpu_asic_flush_hdp(adev, NULL);
> - amdgpu_gmc_flush_gpu_tlb(adev, 0);
> + amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
>return 0;
>  }
>
> @@ -331,7 +331,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t 
> offset,
>
>mb();
>amdgpu_asic_flush_hdp(adev, NULL);
> - amdgpu_gmc_flush_gpu_tlb(adev, 0);
> + amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
>return 0;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index 6fa7ef4..4c5f18c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -64,7 +64,7 @@ struct amdgpu_vmhub {
>  struct amdgpu_gmc_funcs {
>/* flush the vm tlb via mmio */
>void (*flush_gpu_tlb)(struct amdgpu_device *adev,
> -   uint32_t vmid);
> +   uint32_t vmid, uint32_t flush_type);
>/* flush the vm tlb via ring */
>uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
>   uint64_t pd_addr);
> @@ -151,7 +151,7 @@ struct amdgpu_gmc {
>struct amdgpu_xgmi xgmi;
>  };
>
> -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) 
> (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
> +#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) 
> (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
>  #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) 
> (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>  #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) 
> (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
>  #define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) 
> (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> index e1c2b4e..2821d1d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> @@ -358,7 +358,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
>return 0;
>  }
>
> -static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid)
> +static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev,
> + uint32_t vmid, uint32_t flush_type)
>  {
>WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
>  }
> @@ -580,7 +581,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device 
> *adev)
>else
>gmc_v6_0_set_fault_enable_default(adev, true);
>
> - gmc_v6_0_flush_gpu_tlb(adev, 0);
> + gmc_v6_0_flush_gpu_tlb(adev, 0, 0);
>dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
> (unsigned)(adev->gmc.gart_size >> 20),
> (unsigned long long)table_addr);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index 910c4ce..761dcfb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -430,7 +430,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
>   *
>   * Flush the TLB for the requested page table (CIK).
>   */
> -static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid)
> 

Re: [PATCH 2/2] drm/amdkfd: page_table_base already have the flags needed

2018-10-23 Thread Zhao, Yong
Pushed. Thanks!


Yong


From: Kuehling, Felix
Sent: Tuesday, October 23, 2018 4:43:05 PM
To: Zhao, Yong; amd-gfx@lists.freedesktop.org; brahma_sw_dev
Subject: Re: [PATCH 2/2] drm/amdkfd: page_table_base already have the flags 
needed

The series is Reviewed-by: Felix Kuehling 


On 2018-10-23 1:00 p.m., Zhao, Yong wrote:
>
> How about those two patches?
>
>
> Yong
>
> 
> *From:* Zhao, Yong
> *Sent:* Monday, October 22, 2018 2:33:26 PM
> *To:* amd-gfx@lists.freedesktop.org; brahma_sw_dev
> *Cc:* Zhao, Yong
> *Subject:* [PATCH 2/2] drm/amdkfd: page_table_base already have the
> flags needed
>
> The flags are added when calling amdgpu_gmc_pd_addr().
>
> Change-Id: Idd85b1ac35d3d100154df8229ea20721d9a7045c
> Signed-off-by: Yong Zhao 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 5 ++---
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 +
>  2 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index 54c3690..60b5f56c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -978,7 +978,6 @@ static void set_vm_context_page_table_base(struct
> kgd_dev *kgd, uint32_t vmid,
>  uint64_t page_table_base)
>  {
>  struct amdgpu_device *adev = get_amdgpu_device(kgd);
> -   uint64_t base = page_table_base | AMDGPU_PTE_VALID;
>
>  if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
>  pr_err("trying to set page table base for wrong VMID
> %u\n",
> @@ -990,7 +989,7 @@ static void set_vm_context_page_table_base(struct
> kgd_dev *kgd, uint32_t vmid,
>   * now, all processes share the same address space size, like
>   * on GFX8 and older.
>   */
> -   mmhub_v1_0_setup_vm_pt_regs(adev, vmid, base);
> +   mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
>
> -   gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, base);
> +   gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
>  }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 53ff86d..dec8e64 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -507,6 +507,7 @@ struct qcm_process_device {
>   * All the memory management data should be here too
>   */
>  uint64_t gds_context_area;
> +   /* Contains page table flags such as AMDGPU_PTE_VALID since
> gfx9 */
>  uint64_t page_table_base;
>  uint32_t sh_mem_config;
>  uint32_t sh_mem_bases;
> --
> 2.7.4
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/amdgpu: Reorganize *_flush_gpu_tlb() for kfd to use

2018-10-23 Thread Liu, Shaoyun
I'm working on a vm fault issue that this tlb flush type can have  
effect on this , do we plan to make a kernel parameter for this ?


Regards

shaoyun.liu

On 2018-10-23 4:37 p.m., Kuehling, Felix wrote:
> It occurred to me that the flush_type is a hardware-specific value, but
> you're using it in a hardware-abstracted interface. If the meaning of
> the flush type values changes in future HW-generations, we'll need to
> define an abstract enum that gets translated to the respective HW values
> in the HW-specific code.
>
> Anyway, this works for now.
>
> One more cosmetic comment inline ...
>
> Other than that the series is Reviewed-by: Felix Kuehling
> 
>
> Regards,
>    Felix
>
> On 2018-10-23 2:15 p.m., Zhao, Yong wrote:
>> Add a flush_type parameter to that series of functions.
>>
>> Change-Id: I3dcd71955297c53b181f82e7078981230c642c01
>> Signed-off-by: Yong Zhao 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c |  4 ++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |  4 ++--
>>   drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c|  5 +++--
>>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c|  5 +++--
>>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c|  4 ++--
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 19 ++-
>>   6 files changed, 22 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> index 11fea28..9a212aa 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> @@ -248,7 +248,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, 
>> uint64_t offset,
>>  }
>>  mb();
>>  amdgpu_asic_flush_hdp(adev, NULL);
>> -amdgpu_gmc_flush_gpu_tlb(adev, 0);
>> +amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
>>  return 0;
>>   }
>>   
>> @@ -331,7 +331,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, 
>> uint64_t offset,
>>   
>>  mb();
>>  amdgpu_asic_flush_hdp(adev, NULL);
>> -amdgpu_gmc_flush_gpu_tlb(adev, 0);
>> +amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
>>  return 0;
>>   }
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> index 6fa7ef4..4c5f18c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> @@ -64,7 +64,7 @@ struct amdgpu_vmhub {
>>   struct amdgpu_gmc_funcs {
>>  /* flush the vm tlb via mmio */
>>  void (*flush_gpu_tlb)(struct amdgpu_device *adev,
>> -  uint32_t vmid);
>> +  uint32_t vmid, uint32_t flush_type);
>>  /* flush the vm tlb via ring */
>>  uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
>> uint64_t pd_addr);
>> @@ -151,7 +151,7 @@ struct amdgpu_gmc {
>>  struct amdgpu_xgmi xgmi;
>>   };
>>   
>> -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) 
>> (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
>> +#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) 
>> (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
>>   #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) 
>> (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>>   #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) 
>> (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
>>   #define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) 
>> (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>> index e1c2b4e..2821d1d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>> @@ -358,7 +358,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
>>  return 0;
>>   }
>>   
>> -static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t 
>> vmid)
>> +static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev,
>> +uint32_t vmid, uint32_t flush_type)
>>   {
>>  WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
>>   }
>> @@ -580,7 +581,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device 
>> *adev)
>>  else
>>  gmc_v6_0_set_fault_enable_default(adev, true);
>>   
>> -gmc_v6_0_flush_gpu_tlb(adev, 0);
>> +gmc_v6_0_flush_gpu_tlb(adev, 0, 0);
>>  dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
>>   (unsigned)(adev->gmc.gart_size >> 20),
>>   (unsigned long long)table_addr);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> index 910c4ce..761dcfb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> @@ -430,7 +430,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
>>*
>>* Flush the TLB for the requested page table (CIK).
>>*/
>> -static void gmc_v7_0_flush_gpu_tlb(struct 

Re: [PATCH 2/2] drm/amdkfd: page_table_base already have the flags needed

2018-10-23 Thread Kuehling, Felix
The series is Reviewed-by: Felix Kuehling 


On 2018-10-23 1:00 p.m., Zhao, Yong wrote:
>
> How about those two patches?
>
>
> Yong
>
> 
> *From:* Zhao, Yong
> *Sent:* Monday, October 22, 2018 2:33:26 PM
> *To:* amd-gfx@lists.freedesktop.org; brahma_sw_dev
> *Cc:* Zhao, Yong
> *Subject:* [PATCH 2/2] drm/amdkfd: page_table_base already have the
> flags needed
>  
> The flags are added when calling amdgpu_gmc_pd_addr().
>
> Change-Id: Idd85b1ac35d3d100154df8229ea20721d9a7045c
> Signed-off-by: Yong Zhao 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 5 ++---
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 +
>  2 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index 54c3690..60b5f56c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -978,7 +978,6 @@ static void set_vm_context_page_table_base(struct
> kgd_dev *kgd, uint32_t vmid,
>  uint64_t page_table_base)
>  {
>  struct amdgpu_device *adev = get_amdgpu_device(kgd);
> -   uint64_t base = page_table_base | AMDGPU_PTE_VALID;
>  
>  if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
>  pr_err("trying to set page table base for wrong VMID
> %u\n",
> @@ -990,7 +989,7 @@ static void set_vm_context_page_table_base(struct
> kgd_dev *kgd, uint32_t vmid,
>   * now, all processes share the same address space size, like
>   * on GFX8 and older.
>   */
> -   mmhub_v1_0_setup_vm_pt_regs(adev, vmid, base);
> +   mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
>  
> -   gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, base);
> +   gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
>  }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 53ff86d..dec8e64 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -507,6 +507,7 @@ struct qcm_process_device {
>   * All the memory management data should be here too
>   */
>  uint64_t gds_context_area;
> +   /* Contains page table flags such as AMDGPU_PTE_VALID since
> gfx9 */
>  uint64_t page_table_base;
>  uint32_t sh_mem_config;
>  uint32_t sh_mem_bases;
> -- 
> 2.7.4
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/amdgpu: Reorganize *_flush_gpu_tlb() for kfd to use

2018-10-23 Thread Kuehling, Felix
It occurred to me that the flush_type is a hardware-specific value, but
you're using it in a hardware-abstracted interface. If the meaning of
the flush type values changes in future HW-generations, we'll need to
define an abstract enum that gets translated to the respective HW values
in the HW-specific code.

Anyway, this works for now.

One more cosmetic comment inline ...

Other than that the series is Reviewed-by: Felix Kuehling


Regards,
  Felix

On 2018-10-23 2:15 p.m., Zhao, Yong wrote:
> Add a flush_type parameter to that series of functions.
>
> Change-Id: I3dcd71955297c53b181f82e7078981230c642c01
> Signed-off-by: Yong Zhao 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c |  4 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |  4 ++--
>  drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c|  5 +++--
>  drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c|  5 +++--
>  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c|  4 ++--
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 19 ++-
>  6 files changed, 22 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> index 11fea28..9a212aa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> @@ -248,7 +248,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, 
> uint64_t offset,
>   }
>   mb();
>   amdgpu_asic_flush_hdp(adev, NULL);
> - amdgpu_gmc_flush_gpu_tlb(adev, 0);
> + amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
>   return 0;
>  }
>  
> @@ -331,7 +331,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t 
> offset,
>  
>   mb();
>   amdgpu_asic_flush_hdp(adev, NULL);
> - amdgpu_gmc_flush_gpu_tlb(adev, 0);
> + amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
>   return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index 6fa7ef4..4c5f18c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -64,7 +64,7 @@ struct amdgpu_vmhub {
>  struct amdgpu_gmc_funcs {
>   /* flush the vm tlb via mmio */
>   void (*flush_gpu_tlb)(struct amdgpu_device *adev,
> -   uint32_t vmid);
> +   uint32_t vmid, uint32_t flush_type);
>   /* flush the vm tlb via ring */
>   uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
>  uint64_t pd_addr);
> @@ -151,7 +151,7 @@ struct amdgpu_gmc {
>   struct amdgpu_xgmi xgmi;
>  };
>  
> -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) 
> (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
> +#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) 
> (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
>  #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) 
> (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>  #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) 
> (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
>  #define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) 
> (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> index e1c2b4e..2821d1d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> @@ -358,7 +358,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
>   return 0;
>  }
>  
> -static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid)
> +static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev,
> + uint32_t vmid, uint32_t flush_type)
>  {
>   WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
>  }
> @@ -580,7 +581,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device 
> *adev)
>   else
>   gmc_v6_0_set_fault_enable_default(adev, true);
>  
> - gmc_v6_0_flush_gpu_tlb(adev, 0);
> + gmc_v6_0_flush_gpu_tlb(adev, 0, 0);
>   dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
>(unsigned)(adev->gmc.gart_size >> 20),
>(unsigned long long)table_addr);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index 910c4ce..761dcfb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -430,7 +430,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
>   *
>   * Flush the TLB for the requested page table (CIK).
>   */
> -static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid)
> +static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev,
> + uint32_t vmid, uint32_t flush_type)
>  {
>   /* bits 0-15 are the VM contexts0-15 */
>   WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> @@ -698,7 +699,7 @@ static int gmc_v7_0_gart_enable(struct 

[PATCH 2/2] drm/amdkfd: Use functions from amdgpu to invalidate vmid in kfd

2018-10-23 Thread Zhao, Yong
As part of the change, we stop taking the srbm lock, and start to use
the same invalidation engine and software lock.

Change-Id: I306305e43d4b4032316909b3f4e3f9f5ca4520ae
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 37 +--
 1 file changed, 1 insertion(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 3ade5d5..f4b4706 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -48,17 +48,6 @@
 #include "soc15d.h"
 #include "gmc_v9_0.h"
 
-/* HACK: MMHUB and GC both have VM-related register with the same
- * names but different offsets. Define the MMHUB register we need here
- * with a prefix. A proper solution would be to move the functions
- * programming these registers into gfx_v9_0.c and mmhub_v1_0.c
- * respectively.
- */
-#define mmMMHUB_VM_INVALIDATE_ENG16_REQ0x06f3
-#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX   0
-
-#define mmMMHUB_VM_INVALIDATE_ENG16_ACK0x0705
-#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX   0
 
 #define V9_PIPE_PER_MEC(4)
 #define V9_QUEUES_PER_PIPE_MEC (8)
@@ -742,15 +731,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct 
kgd_dev *kgd,
 static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
 {
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-   uint32_t req = (1 << vmid) |
-   (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */
-   VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK |
-   VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK |
-   VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK |
-   VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK |
-   VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK;
-
-   mutex_lock(>srbm_mutex);
 
/* Use legacy mode tlb invalidation.
 *
@@ -767,22 +747,7 @@ static void write_vmid_invalidate_request(struct kgd_dev 
*kgd, uint8_t vmid)
 * TODO 2: support range-based invalidation, requires kfg2kgd
 * interface change
 */
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req);
-
-   WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ),
-   req);
-
-   while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) &
-   (1 << vmid)))
-   cpu_relax();
-
-   while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0,
-   mmMMHUB_VM_INVALIDATE_ENG16_ACK)) &
-   (1 << vmid)))
-   cpu_relax();
-
-   mutex_unlock(>srbm_mutex);
-
+   amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
 }
 
 static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amdgpu: Reorganize *_flush_gpu_tlb() for kfd to use

2018-10-23 Thread Zhao, Yong
Add a flush_type parameter to that series of functions.

Change-Id: I3dcd71955297c53b181f82e7078981230c642c01
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |  4 ++--
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c|  5 +++--
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c|  5 +++--
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c|  4 ++--
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 19 ++-
 6 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 11fea28..9a212aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -248,7 +248,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t 
offset,
}
mb();
amdgpu_asic_flush_hdp(adev, NULL);
-   amdgpu_gmc_flush_gpu_tlb(adev, 0);
+   amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
return 0;
 }
 
@@ -331,7 +331,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t 
offset,
 
mb();
amdgpu_asic_flush_hdp(adev, NULL);
-   amdgpu_gmc_flush_gpu_tlb(adev, 0);
+   amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 6fa7ef4..4c5f18c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -64,7 +64,7 @@ struct amdgpu_vmhub {
 struct amdgpu_gmc_funcs {
/* flush the vm tlb via mmio */
void (*flush_gpu_tlb)(struct amdgpu_device *adev,
- uint32_t vmid);
+ uint32_t vmid, uint32_t flush_type);
/* flush the vm tlb via ring */
uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
   uint64_t pd_addr);
@@ -151,7 +151,7 @@ struct amdgpu_gmc {
struct amdgpu_xgmi xgmi;
 };
 
-#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) 
(adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
+#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) 
(adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
 #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) 
(r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
 #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) 
(r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
 #define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) 
(adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index e1c2b4e..2821d1d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -358,7 +358,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
return 0;
 }
 
-static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid)
+static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev,
+   uint32_t vmid, uint32_t flush_type)
 {
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 }
@@ -580,7 +581,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
else
gmc_v6_0_set_fault_enable_default(adev, true);
 
-   gmc_v6_0_flush_gpu_tlb(adev, 0);
+   gmc_v6_0_flush_gpu_tlb(adev, 0, 0);
dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
 (unsigned)(adev->gmc.gart_size >> 20),
 (unsigned long long)table_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 910c4ce..761dcfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -430,7 +430,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
  *
  * Flush the TLB for the requested page table (CIK).
  */
-static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid)
+static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev,
+   uint32_t vmid, uint32_t flush_type)
 {
/* bits 0-15 are the VM contexts0-15 */
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
@@ -698,7 +699,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
WREG32(mmCHUB_CONTROL, tmp);
}
 
-   gmc_v7_0_flush_gpu_tlb(adev, 0);
+   gmc_v7_0_flush_gpu_tlb(adev, 0, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
 (unsigned)(adev->gmc.gart_size >> 20),
 (unsigned long long)table_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 1d3265c..531aaf3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -611,7 +611,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
  * Flush the TLB for the 

Re: [PATCH 2/2] drm/amdkfd: Use functions from amdgpu to invalidate vmid in kfd

2018-10-23 Thread Deucher, Alexander
Series is:

Reviewed-by: Alex Deucher 


From: Zhao, Yong
Sent: Tuesday, October 23, 2018 2:15:59 PM
To: brahma_sw_dev; amd-gfx@lists.freedesktop.org
Cc: Zhao, Yong
Subject: [PATCH 2/2] drm/amdkfd: Use functions from amdgpu to invalidate vmid 
in kfd

As part of the change, we stop taking the srbm lock, and start to use
the same invalidation engine and software lock.

Change-Id: I306305e43d4b4032316909b3f4e3f9f5ca4520ae
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 37 +--
 1 file changed, 1 insertion(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 3ade5d5..f4b4706 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -48,17 +48,6 @@
 #include "soc15d.h"
 #include "gmc_v9_0.h"

-/* HACK: MMHUB and GC both have VM-related register with the same
- * names but different offsets. Define the MMHUB register we need here
- * with a prefix. A proper solution would be to move the functions
- * programming these registers into gfx_v9_0.c and mmhub_v1_0.c
- * respectively.
- */
-#define mmMMHUB_VM_INVALIDATE_ENG16_REQ0x06f3
-#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX   0
-
-#define mmMMHUB_VM_INVALIDATE_ENG16_ACK0x0705
-#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX   0

 #define V9_PIPE_PER_MEC (4)
 #define V9_QUEUES_PER_PIPE_MEC  (8)
@@ -742,15 +731,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct 
kgd_dev *kgd,
 static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
 {
 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-   uint32_t req = (1 << vmid) |
-   (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */
-   VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK |
-   VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK |
-   VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK |
-   VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK |
-   VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK;
-
-   mutex_lock(>srbm_mutex);

 /* Use legacy mode tlb invalidation.
  *
@@ -767,22 +747,7 @@ static void write_vmid_invalidate_request(struct kgd_dev 
*kgd, uint8_t vmid)
  * TODO 2: support range-based invalidation, requires kfg2kgd
  * interface change
  */
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req);
-
-   WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ),
-   req);
-
-   while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) &
-   (1 << vmid)))
-   cpu_relax();
-
-   while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0,
-   mmMMHUB_VM_INVALIDATE_ENG16_ACK)) &
-   (1 << vmid)))
-   cpu_relax();
-
-   mutex_unlock(>srbm_mutex);
-
+   amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
 }

 static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH libdrm 2/2] amdgpu: don't track handles for non-memory allocations

2018-10-23 Thread Marek Olšák
From: Marek Olšák 

---
 amdgpu/amdgpu_bo.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
index 81f8a5f7..00b9b54a 100644
--- a/amdgpu/amdgpu_bo.c
+++ b/amdgpu/amdgpu_bo.c
@@ -91,26 +91,29 @@ drm_public int amdgpu_bo_alloc(amdgpu_device_handle dev,
if (r)
goto out;
 
r = amdgpu_bo_create(dev, alloc_buffer->alloc_size, args.out.handle,
 buf_handle);
if (r) {
amdgpu_close_kms_handle(dev, args.out.handle);
goto out;
}
 
-   pthread_mutex_lock(>bo_table_mutex);
-   r = handle_table_insert(>bo_handles, (*buf_handle)->handle,
-   *buf_handle);
-   pthread_mutex_unlock(>bo_table_mutex);
-   if (r)
-   amdgpu_bo_free(*buf_handle);
+   if (alloc_buffer->preferred_heap &
+   (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) {
+   pthread_mutex_lock(>bo_table_mutex);
+   r = handle_table_insert(>bo_handles, (*buf_handle)->handle,
+   *buf_handle);
+   pthread_mutex_unlock(>bo_table_mutex);
+   if (r)
+   amdgpu_bo_free(*buf_handle);
+   }
 out:
return r;
 }
 
 drm_public int amdgpu_bo_set_metadata(amdgpu_bo_handle bo,
  struct amdgpu_bo_metadata *info)
 {
struct drm_amdgpu_gem_metadata args = {};
 
args.handle = bo->handle;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH libdrm 1/2] amdgpu: prevent an integer wraparound of cpu_map_count

2018-10-23 Thread Marek Olšák
From: Marek Olšák 

---
 amdgpu/amdgpu_bo.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
index c0f42e81..81f8a5f7 100644
--- a/amdgpu/amdgpu_bo.c
+++ b/amdgpu/amdgpu_bo.c
@@ -22,20 +22,21 @@
  *
  */
 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 
 #include "libdrm_macros.h"
 #include "xf86drm.h"
 #include "amdgpu_drm.h"
 #include "amdgpu_internal.h"
 #include "util_math.h"
 
@@ -442,21 +443,29 @@ drm_public int amdgpu_bo_cpu_map(amdgpu_bo_handle bo, 
void **cpu)
 {
union drm_amdgpu_gem_mmap args;
void *ptr;
int r;
 
pthread_mutex_lock(>cpu_access_mutex);
 
if (bo->cpu_ptr) {
/* already mapped */
assert(bo->cpu_map_count > 0);
-   bo->cpu_map_count++;
+
+   /* If the counter has already reached INT_MAX, don't increment
+* it and assume that the buffer will be mapped indefinitely.
+* The buffer is pretty unlikely to get unmapped by the user
+* at this point.
+*/
+   if (bo->cpu_map_count != INT_MAX)
+   bo->cpu_map_count++;
+
*cpu = bo->cpu_ptr;
pthread_mutex_unlock(>cpu_access_mutex);
return 0;
}
 
assert(bo->cpu_map_count == 0);
 
memset(, 0, sizeof(args));
 
/* Query the buffer address (args.addr_ptr).
@@ -492,21 +501,27 @@ drm_public int amdgpu_bo_cpu_unmap(amdgpu_bo_handle bo)
 
pthread_mutex_lock(>cpu_access_mutex);
assert(bo->cpu_map_count >= 0);
 
if (bo->cpu_map_count == 0) {
/* not mapped */
pthread_mutex_unlock(>cpu_access_mutex);
return -EINVAL;
}
 
-   bo->cpu_map_count--;
+   /* If the counter has already reached INT_MAX, don't decrement it.
+* This is because amdgpu_bo_cpu_map doesn't increment it past
+* INT_MAX.
+*/
+   if (bo->cpu_map_count != INT_MAX)
+   bo->cpu_map_count--;
+
if (bo->cpu_map_count > 0) {
/* mapped multiple times */
pthread_mutex_unlock(>cpu_access_mutex);
return 0;
}
 
r = drm_munmap(bo->cpu_ptr, bo->alloc_size) == 0 ? 0 : -errno;
bo->cpu_ptr = NULL;
pthread_mutex_unlock(>cpu_access_mutex);
return r;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Add DCC flags for GFX9 amdgpu_bo

2018-10-23 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Tue, Oct 23, 2018 at 10:05 AM Nicholas Kazlauskas <
nicholas.kazlaus...@amd.com> wrote:

> [Why]
> Hardware support for Delta Color Compression (DCC) decompression is
> available in DC for GFX9 but there's no way for userspace to enable
> the feature.
>
> Enabling the feature can provide improved GFX performance and
> power savings in many situations.
>
> [How]
> Extend the GFX9 tiling flags to include DCC parameters. These are
> logically grouped together with tiling flags even if they are
> technically distinct.
>
> This trivially maintains backwards compatibility with existing
> users of amdgpu_gem_metadata. No new IOCTls or data structures are
> needed to support DCC.
>
> This patch helps expose DCC attributes to both libdrm and amdgpu_dm.
>
> Signed-off-by: Nicholas Kazlauskas 
> ---
>  include/uapi/drm/amdgpu_drm.h | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 6a0d77dcfc47..faaad04814e4 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -329,6 +329,12 @@ struct drm_amdgpu_gem_userptr {
>  /* GFX9 and later: */
>  #define AMDGPU_TILING_SWIZZLE_MODE_SHIFT   0
>  #define AMDGPU_TILING_SWIZZLE_MODE_MASK0x1f
> +#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT5
> +#define AMDGPU_TILING_DCC_OFFSET_256B_MASK 0xFF
> +#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT  29
> +#define AMDGPU_TILING_DCC_PITCH_MAX_MASK   0x3FFF
> +#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT43
> +#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK 0x1
>
>  /* Set/Get helpers for tiling flags. */
>  #define AMDGPU_TILING_SET(field, value) \
> --
> 2.17.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdkfd: page_table_base already have the flags needed

2018-10-23 Thread Zhao, Yong
How about those two patches?


Yong


From: Zhao, Yong
Sent: Monday, October 22, 2018 2:33:26 PM
To: amd-gfx@lists.freedesktop.org; brahma_sw_dev
Cc: Zhao, Yong
Subject: [PATCH 2/2] drm/amdkfd: page_table_base already have the flags needed

The flags are added when calling amdgpu_gmc_pd_addr().

Change-Id: Idd85b1ac35d3d100154df8229ea20721d9a7045c
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 5 ++---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 +
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 54c3690..60b5f56c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -978,7 +978,6 @@ static void set_vm_context_page_table_base(struct kgd_dev 
*kgd, uint32_t vmid,
 uint64_t page_table_base)
 {
 struct amdgpu_device *adev = get_amdgpu_device(kgd);
-   uint64_t base = page_table_base | AMDGPU_PTE_VALID;

 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
 pr_err("trying to set page table base for wrong VMID %u\n",
@@ -990,7 +989,7 @@ static void set_vm_context_page_table_base(struct kgd_dev 
*kgd, uint32_t vmid,
  * now, all processes share the same address space size, like
  * on GFX8 and older.
  */
-   mmhub_v1_0_setup_vm_pt_regs(adev, vmid, base);
+   mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);

-   gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, base);
+   gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 53ff86d..dec8e64 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -507,6 +507,7 @@ struct qcm_process_device {
  * All the memory management data should be here too
  */
 uint64_t gds_context_area;
+   /* Contains page table flags such as AMDGPU_PTE_VALID since gfx9 */
 uint64_t page_table_base;
 uint32_t sh_mem_config;
 uint32_t sh_mem_bases;
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2] drm/amdgpu: Enable default GPU reset for dGPU on gfx8/9 v2

2018-10-23 Thread Alex Deucher
On Tue, Oct 23, 2018 at 12:51 PM Andrey Grodzovsky
 wrote:
>
> After testing looks like these subset of ASICs has GPU reset
> working for the most part. Enable reset due to job timeout.
>
> v2: Switch from GFX version to ASIC type.
>
> Signed-off-by: Andrey Grodzovsky 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 30 
> ++
>  1 file changed, 26 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index d11489e..759f3fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3297,13 +3297,35 @@ bool amdgpu_device_should_recover_gpu(struct 
> amdgpu_device *adev)
> return false;
> }
>
> -   if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1  &&
> -!amdgpu_sriov_vf(adev))) {
> -   DRM_INFO("GPU recovery disabled.\n");
> -   return false;
> +   if (amdgpu_gpu_recovery == 0)
> +   goto disabled;
> +
> +   if (amdgpu_sriov_vf(adev))
> +   return true;
> +
> +   if (amdgpu_gpu_recovery == -1) {
> +   switch (adev->asic_type) {
> +   case CHIP_TOPAZ:
> +   case CHIP_TONGA:
> +   case CHIP_FIJI:
> +   case CHIP_POLARIS10:
> +   case CHIP_POLARIS11:
> +   case CHIP_POLARIS12:
> +   case CHIP_VEGAM:
> +   case CHIP_VEGA20:
> +   case CHIP_VEGA10:
> +   case CHIP_VEGA12:
> +   break;
> +   default:
> +   goto disabled;
> +   }
> }
>
> return true;
> +
> +disabled:
> +   DRM_INFO("GPU recovery disabled.\n");
> +   return false;
>  }
>
>  /**
> --
> 2.7.4
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2] drm/amdgpu: Enable default GPU reset for dGPU on gfx8/9 v2

2018-10-23 Thread Andrey Grodzovsky
After testing looks like these subset of ASICs has GPU reset
working for the most part. Enable reset due to job timeout.

v2: Switch from GFX version to ASIC type.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d11489e..759f3fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3297,13 +3297,35 @@ bool amdgpu_device_should_recover_gpu(struct 
amdgpu_device *adev)
return false;
}
 
-   if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1  &&
-!amdgpu_sriov_vf(adev))) {
-   DRM_INFO("GPU recovery disabled.\n");
-   return false;
+   if (amdgpu_gpu_recovery == 0)
+   goto disabled;
+
+   if (amdgpu_sriov_vf(adev))
+   return true;
+
+   if (amdgpu_gpu_recovery == -1) {
+   switch (adev->asic_type) {
+   case CHIP_TOPAZ:
+   case CHIP_TONGA:
+   case CHIP_FIJI:
+   case CHIP_POLARIS10:
+   case CHIP_POLARIS11:
+   case CHIP_POLARIS12:
+   case CHIP_VEGAM:
+   case CHIP_VEGA20:
+   case CHIP_VEGA10:
+   case CHIP_VEGA12:
+   break;
+   default:
+   goto disabled;
+   }
}
 
return true;
+
+disabled:
+   DRM_INFO("GPU recovery disabled.\n");
+   return false;
 }
 
 /**
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 1/2] drm/sched: Add boolean to mark if sched is ready to work v2

2018-10-23 Thread Grodzovsky, Andrey


On 10/22/2018 05:33 AM, Koenig, Christian wrote:
> Am 19.10.18 um 22:52 schrieb Andrey Grodzovsky:
>> Problem:
>> A particular scheduler may become unsuable (underlying HW) after
>> some event (e.g. GPU reset). If it's later chosen by
>> the get free sched. policy a command will fail to be
>> submitted.
>>
>> Fix:
>> Add a driver specific callback to report the sched status so
>> rq with bad sched can be avoided in favor of working one or
>> none in which case job init will fail.
>>
>> v2: Switch from driver callback to flag in scheduler.
>>
>> Signed-off-by: Andrey Grodzovsky 
>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c |  2 +-
>>drivers/gpu/drm/etnaviv/etnaviv_sched.c   |  2 +-
>>drivers/gpu/drm/scheduler/sched_entity.c  |  9 -
>>drivers/gpu/drm/scheduler/sched_main.c| 10 +-
>>drivers/gpu/drm/v3d/v3d_sched.c   |  4 ++--
>>include/drm/gpu_scheduler.h   |  5 -
>>6 files changed, 25 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>> index 5448cf2..bf845b0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>> @@ -450,7 +450,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring 
>> *ring,
>>
>>  r = drm_sched_init(>sched, _sched_ops,
>> num_hw_submission, amdgpu_job_hang_limit,
>> -   timeout, ring->name);
>> +   timeout, ring->name, false);
>>  if (r) {
>>  DRM_ERROR("Failed to create scheduler on ring %s.\n",
>>ring->name);
>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
>> b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>> index f8c5f1e..9dca347 100644
>> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>> @@ -178,7 +178,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
>>
>>  ret = drm_sched_init(>sched, _sched_ops,
>>   etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
>> - msecs_to_jiffies(500), dev_name(gpu->dev));
>> + msecs_to_jiffies(500), dev_name(gpu->dev), true);
>>  if (ret)
>>  return ret;
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
>> b/drivers/gpu/drm/scheduler/sched_entity.c
>> index 3e22a54..ba54c30 100644
>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>> @@ -130,7 +130,14 @@ drm_sched_entity_get_free_sched(struct drm_sched_entity 
>> *entity)
>>  int i;
>>
>>  for (i = 0; i < entity->num_rq_list; ++i) {
>> -num_jobs = atomic_read(>rq_list[i]->sched->num_jobs);
>> +struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched;
>> +
>> +if (!entity->rq_list[i]->sched->ready) {
>> +DRM_WARN("sched%s is not ready, skipping", sched->name);
>> +continue;
>> +}
>> +
>> +num_jobs = atomic_read(>num_jobs);
>>  if (num_jobs < min_jobs) {
>>  min_jobs = num_jobs;
>>  rq = entity->rq_list[i];
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
>> b/drivers/gpu/drm/scheduler/sched_main.c
>> index 63b997d..772adec 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -420,6 +420,9 @@ int drm_sched_job_init(struct drm_sched_job *job,
>>  struct drm_gpu_scheduler *sched;
>>
>>  drm_sched_entity_select_rq(entity);
>> +if (!entity->rq)
>> +return -ENOENT;
>> +
>>  sched = entity->rq->sched;
>>
>>  job->sched = sched;
>> @@ -598,6 +601,7 @@ static int drm_sched_main(void *param)
>> * @hang_limit: number of times to allow a job to hang before dropping it
>> * @timeout: timeout value in jiffies for the scheduler
>> * @name: name used for debugging
>> + * @ready: marks if the underlying HW is ready to work
>> *
>> * Return 0 on success, otherwise error code.
>> */
>> @@ -606,7 +610,8 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>> unsigned hw_submission,
>> unsigned hang_limit,
>> long timeout,
>> -   const char *name)
>> +   const char *name,
>> +   bool ready)
> Please drop the ready flag here. We should consider a scheduler ready as
> soon as it is initialized.

Not totally agree with this because this flag marks that HW ready to run 
(the HW ring) and not the scheduler which is SW entity,
For amdgpu - drm_sched_init is called from the sw_init stage while the 
ring initialization and tests takes place in hw_init stage. Maybe if the 
flag is
named 'hw_ready' instead of just 'ready' it would make more sense ?
Also in case there 

Re: [PATCH 2/3] drm/amdgpu: Expose gmc_v9_0_flush_gpu_tlb_helper() for kfd to use

2018-10-23 Thread Koenig, Christian
Am 22.10.18 um 20:41 schrieb Zhao, Yong:
> Change-Id: I3dcd71955297c53b181f82e7078981230c642c01
> Signed-off-by: Yong Zhao 
> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 64 
> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h |  3 ++
>   2 files changed, 40 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index f35d7a5..6f96545 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -293,14 +293,15 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device 
> *adev)
>   adev->gmc.vm_fault.funcs = _v9_0_irq_funcs;
>   }
>   
> -static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
> +static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
> + uint32_t flush_type)
>   {
>   u32 req = 0;
>   
>   /* invalidate using legacy mode on vmid*/
>   req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
>   PER_VMID_INVALIDATE_REQ, 1 << vmid);
> - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0);
> + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 
> flush_type);
>   req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
>   req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
>   req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
> @@ -354,32 +355,15 @@ static signed long  
> amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
>   return r;
>   }
>   
> -/*
> - * GART
> - * VMID 0 is the physical GPU addresses as used by the kernel.
> - * VMIDs 1-15 are used for userspace clients and are handled
> - * by the amdgpu vm/hsa code.
> - */
> -
> -/**
> - * gmc_v9_0_flush_gpu_tlb - gart tlb flush callback
> - *
> - * @adev: amdgpu_device pointer
> - * @vmid: vm instance to flush
> - *
> - * Flush the TLB for the requested page table.
> - */
> -static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
> - uint32_t vmid)
> +void gmc_v9_0_flush_gpu_tlb_helper(struct amdgpu_device *adev, uint32_t vmid,
> + uint32_t flush_type, uint32_t eng, bool lock)

This one needs kernel documentation and a better name would be nice.

In general sounds like a nice cleanup, but I exposing ASIC specific 
functions is not necessarily a good idea.

We should probably move the ASIC specific KFD code into this file instead.

Christian.

>   {
> - /* Use register 17 for GART */
> - const unsigned eng = 17;
>   unsigned i, j;
>   int r;
>   
>   for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
>   struct amdgpu_vmhub *hub = >vmhub[i];
> - u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
> + u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>   
>   if (adev->gfx.kiq.ring.ready &&
>   (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
> @@ -390,7 +374,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
> *adev,
>   continue;
>   }
>   
> - spin_lock(>gmc.invalidate_lock);
> + if (lock)
> + spin_lock(>gmc.invalidate_lock);
>   
>   WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
>   
> @@ -403,7 +388,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
> *adev,
>   cpu_relax();
>   }
>   if (j < 100) {
> - spin_unlock(>gmc.invalidate_lock);
> + if (lock)
> + spin_unlock(>gmc.invalidate_lock);
>   continue;
>   }
>   
> @@ -416,20 +402,44 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
> *adev,
>   udelay(1);
>   }
>   if (j < adev->usec_timeout) {
> - spin_unlock(>gmc.invalidate_lock);
> + if (lock)
> + spin_unlock(>gmc.invalidate_lock);
>   continue;
>   }
> - spin_unlock(>gmc.invalidate_lock);
> + if (lock)
> + spin_unlock(>gmc.invalidate_lock);
>   DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>   }
>   }
>   
> +/*
> + * GART
> + * VMID 0 is the physical GPU addresses as used by the kernel.
> + * VMIDs 1-15 are used for userspace clients and are handled
> + * by the amdgpu vm/hsa code.
> + */
> +
> +/**
> + * gmc_v9_0_flush_gpu_tlb - gart tlb flush callback
> + *
> + * @adev: amdgpu_device pointer
> + * @vmid: vm instance to flush
> + *
> + * Flush the TLB for the requested page table.
> + */
> +static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
> + uint32_t vmid)
> +{
> + /* Use engine 17 for amdgpu */
> + 

Re: [PATCH v3 2/2] drm/amdgpu: Retire amdgpu_ring.ready flag v3

2018-10-23 Thread Grodzovsky, Andrey


On 10/23/2018 05:23 AM, Christian König wrote:
> Am 22.10.18 um 22:46 schrieb Andrey Grodzovsky:
>> Start using drm_gpu_scheduler.ready isntead.
>>
>> v3:
>> Add helper function to run ring test and set
>> sched.ready flag status accordingly, clean explicit
>> sched.ready sets from the IP specific files.
>>
>> Signed-off-by: Andrey Grodzovsky 
>> ---
>
>> [SNIP]
>> +
>> +int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
>
> This needs some kernel doc, with that fixed the patch is Reviewed-by: 
> Christian König 
>
> Did you missed my comment on the first patch?
>
> Thanks for the nice cleanup,
> Christian.

Yea, it seems like mails destined with dri-devel in CC occasionally end 
up in dri-devel folder and not in my inbox even when sent to me.
Will reply soon.

Andrey
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Add DCC flags for GFX9 amdgpu_bo

2018-10-23 Thread Nicholas Kazlauskas
[Why]
Hardware support for Delta Color Compression (DCC) decompression is
available in DC for GFX9 but there's no way for userspace to enable
the feature.

Enabling the feature can provide improved GFX performance and
power savings in many situations.

[How]
Extend the GFX9 tiling flags to include DCC parameters. These are
logically grouped together with tiling flags even if they are
technically distinct.

This trivially maintains backwards compatibility with existing
users of amdgpu_gem_metadata. No new IOCTls or data structures are
needed to support DCC.

This patch helps expose DCC attributes to both libdrm and amdgpu_dm.

Signed-off-by: Nicholas Kazlauskas 
---
 include/uapi/drm/amdgpu_drm.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 6a0d77dcfc47..faaad04814e4 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -329,6 +329,12 @@ struct drm_amdgpu_gem_userptr {
 /* GFX9 and later: */
 #define AMDGPU_TILING_SWIZZLE_MODE_SHIFT   0
 #define AMDGPU_TILING_SWIZZLE_MODE_MASK0x1f
+#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT5
+#define AMDGPU_TILING_DCC_OFFSET_256B_MASK 0xFF
+#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT  29
+#define AMDGPU_TILING_DCC_PITCH_MAX_MASK   0x3FFF
+#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT43
+#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK 0x1
 
 /* Set/Get helpers for tiling flags. */
 #define AMDGPU_TILING_SET(field, value) \
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 5/8] drm/amdgpu: fix using shared fence for exported BOs v2

2018-10-23 Thread Michel Dänzer
On 2018-10-04 3:12 p.m., Christian König wrote:
> It is perfectly possible that the BO list is created before the BO is
> exported. While at it cleanup setting shared to one instead of true.

"clean up"


> v2: add comment and simplify logic
> 
> Signed-off-by: Christian König 

As this is a bug fix, should it be before patch 5 and have Cc: stable?
Either way, with the above fixed,

Reviewed-by: Michel Dänzer 


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/8] dma-buf: remove shared fence staging in reservation object

2018-10-23 Thread Michel Dänzer
On 2018-10-23 2:20 p.m., Christian König wrote:
> Ping once more! Adding a few more AMD people.
> 
> Any comments on this?

Patches 1 & 3 are a bit over my head I'm afraid.


Patches 2, 4, 6-8 are

Reviewed-by: Michel Dänzer 


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Modify the argument of emit_ib interface

2018-10-23 Thread Zhu, Rex
Ok, I will refine the patch as you suggested.


Thanks.


Best Regards

Rex




From: amd-gfx  on behalf of Koenig, 
Christian 
Sent: Tuesday, October 23, 2018 9:05 PM
To: Zhu, Rex; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: Modify the argument of emit_ib interface


Another concern is more and more parameters may be added to the the common 
interface.

Yeah, that is a rather good argument.

Well in this case please add a wrapper for figuring out the VMID and add the 
job before the ib on the parameter list.

Christian.

Am 23.10.18 um 14:55 schrieb Zhu, Rex:

>Yeah, but that can be calculated in the ring specific function. Can't it?


Yes, we should reserve and map the CSA buffer for gfx/compute/sdma together.


Another concern is more and more parameters may be added to the the common 
interface.


Best Regards

Rex



From: amd-gfx 

 on behalf of Koenig, Christian 

Sent: Tuesday, October 23, 2018 8:47 PM
To: Zhu, Rex; 
amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: Modify the argument of emit_ib interface

Am 23.10.18 um 14:43 schrieb Zhu, Rex:

>Maybe indeed provide the CSA address as parameter instead. It doesn't
>really matter that it is unused by most IP block implementations.

>Alternative is to use a wrapper function to get the VMID from the job.


I preferred to use a wrapper function to get vmid.


Because if use CSA address as parameter, we need to check which ring it is.

The csa address is different between sdma/gfx/compute.

Yeah, but that can be calculated in the ring specific function. Can't it?

I mean if I'm not completely mistaken it is just an offset which needs to be 
added to the base CSA address.

Christian.



Best Regards

Rex







From: Christian König 

Sent: Tuesday, October 23, 2018 8:09 PM
To: Zhu, Rex; 
amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: Modify the argument of emit_ib interface

Am 23.10.18 um 14:01 schrieb Rex Zhu:
> use the point of struct amdgpu_job as the function
> argument instand of vmid, so the other members of
> struct amdgpu_job can be visit in emit_ib function.
>
> Signed-off-by: Rex Zhu 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  3 +--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c  |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h  |  2 +-
>   drivers/gpu/drm/amd/amdgpu/cik_sdma.c|  3 ++-
>   drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c|  3 ++-
>   drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c|  6 --
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c|  6 --
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 24 +---
>   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c   |  4 +++-
>   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   |  4 +++-
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   |  4 +++-
>   drivers/gpu/drm/amd/amdgpu/si_dma.c  |  3 ++-
>   drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c|  2 +-
>   drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c|  2 +-
>   drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c|  8 ++--
>   drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c|  7 +--
>   drivers/gpu/drm/amd/amdgpu/vce_v3_0.c|  4 +++-
>   drivers/gpu/drm/amd/amdgpu/vce_v4_0.c|  4 +++-
>   drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c| 10 +++---
>   20 files changed, 66 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index b8963b7..0b227ca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -221,8 +221,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
> num_ibs,
>!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, 
> Preamble CE ib must be inserted anyway */
>continue;
>
> - amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0,
> - need_ctx_switch);
> + amdgpu_ring_emit_ib(ring, ib, job, need_ctx_switch);
>need_ctx_switch = false;
>}
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 3cb7fb8..0f0f8fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -229,7 +229,7 @@ struct amdgpu_ring {
>   #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
>   #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
>   #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
> -#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), 
> (vmid), (c))
> +#define amdgpu_ring_emit_ib(r, ib, job, c) ((r)->funcs->emit_ib((r), (ib), 
> (job), 

Re: [PATCH] drm/amdgpu: Modify the argument of emit_ib interface

2018-10-23 Thread Koenig, Christian
Another concern is more and more parameters may be added to the the common 
interface.

Yeah, that is a rather good argument.

Well in this case please add a wrapper for figuring out the VMID and add the 
job before the ib on the parameter list.

Christian.

Am 23.10.18 um 14:55 schrieb Zhu, Rex:

>Yeah, but that can be calculated in the ring specific function. Can't it?


Yes, we should reserve and map the CSA buffer for gfx/compute/sdma together.


Another concern is more and more parameters may be added to the the common 
interface.


Best Regards

Rex



From: amd-gfx 

 on behalf of Koenig, Christian 

Sent: Tuesday, October 23, 2018 8:47 PM
To: Zhu, Rex; 
amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: Modify the argument of emit_ib interface

Am 23.10.18 um 14:43 schrieb Zhu, Rex:

>Maybe indeed provide the CSA address as parameter instead. It doesn't
>really matter that it is unused by most IP block implementations.

>Alternative is to use a wrapper function to get the VMID from the job.


I preferred to use a wrapper function to get vmid.


Because if use CSA address as parameter, we need to check which ring it is.

The csa address is different between sdma/gfx/compute.

Yeah, but that can be calculated in the ring specific function. Can't it?

I mean if I'm not completely mistaken it is just an offset which needs to be 
added to the base CSA address.

Christian.



Best Regards

Rex







From: Christian König 

Sent: Tuesday, October 23, 2018 8:09 PM
To: Zhu, Rex; 
amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: Modify the argument of emit_ib interface

Am 23.10.18 um 14:01 schrieb Rex Zhu:
> use the point of struct amdgpu_job as the function
> argument instand of vmid, so the other members of
> struct amdgpu_job can be visit in emit_ib function.
>
> Signed-off-by: Rex Zhu 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  3 +--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c  |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h  |  2 +-
>   drivers/gpu/drm/amd/amdgpu/cik_sdma.c|  3 ++-
>   drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c|  3 ++-
>   drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c|  6 --
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c|  6 --
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 24 +---
>   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c   |  4 +++-
>   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   |  4 +++-
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   |  4 +++-
>   drivers/gpu/drm/amd/amdgpu/si_dma.c  |  3 ++-
>   drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c|  2 +-
>   drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c|  2 +-
>   drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c|  8 ++--
>   drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c|  7 +--
>   drivers/gpu/drm/amd/amdgpu/vce_v3_0.c|  4 +++-
>   drivers/gpu/drm/amd/amdgpu/vce_v4_0.c|  4 +++-
>   drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c| 10 +++---
>   20 files changed, 66 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index b8963b7..0b227ca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -221,8 +221,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
> num_ibs,
>!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, 
> Preamble CE ib must be inserted anyway */
>continue;
>
> - amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0,
> - need_ctx_switch);
> + amdgpu_ring_emit_ib(ring, ib, job, need_ctx_switch);
>need_ctx_switch = false;
>}
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 3cb7fb8..0f0f8fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -229,7 +229,7 @@ struct amdgpu_ring {
>   #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
>   #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
>   #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
> -#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), 
> (vmid), (c))
> +#define amdgpu_ring_emit_ib(r, ib, job, c) ((r)->funcs->emit_ib((r), (ib), 
> (job), (c)))
>   #define amdgpu_ring_emit_pipeline_sync(r) 
> (r)->funcs->emit_pipeline_sync((r))
>   #define amdgpu_ring_emit_vm_flush(r, vmid, addr) 
> (r)->funcs->emit_vm_flush((r), (vmid), (addr))
>   #define amdgpu_ring_emit_fence(r, addr, seq, flags) 
> (r)->funcs->emit_fence((r), (addr), (seq), (flags))
> diff --git 

Re: [PATCH] drm/amdgpu: Modify the argument of emit_ib interface

2018-10-23 Thread Zhu, Rex
>Yeah, but that can be calculated in the ring specific function. Can't it?


Yes, we should reserve and map the CSA buffer for gfx/compute/sdma together.


Another concern is more and more parameters may be added to the the common 
interface.


Best Regards

Rex



From: amd-gfx  on behalf of Koenig, 
Christian 
Sent: Tuesday, October 23, 2018 8:47 PM
To: Zhu, Rex; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: Modify the argument of emit_ib interface

Am 23.10.18 um 14:43 schrieb Zhu, Rex:

>Maybe indeed provide the CSA address as parameter instead. It doesn't
>really matter that it is unused by most IP block implementations.

>Alternative is to use a wrapper function to get the VMID from the job.


I preferred to use a wrapper function to get vmid.


Because if use CSA address as parameter, we need to check which ring it is.

The csa address is different between sdma/gfx/compute.

Yeah, but that can be calculated in the ring specific function. Can't it?

I mean if I'm not completely mistaken it is just an offset which needs to be 
added to the base CSA address.

Christian.



Best Regards

Rex







From: Christian König 

Sent: Tuesday, October 23, 2018 8:09 PM
To: Zhu, Rex; 
amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: Modify the argument of emit_ib interface

Am 23.10.18 um 14:01 schrieb Rex Zhu:
> use the point of struct amdgpu_job as the function
> argument instand of vmid, so the other members of
> struct amdgpu_job can be visit in emit_ib function.
>
> Signed-off-by: Rex Zhu 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  3 +--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c  |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h  |  2 +-
>   drivers/gpu/drm/amd/amdgpu/cik_sdma.c|  3 ++-
>   drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c|  3 ++-
>   drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c|  6 --
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c|  6 --
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 24 +---
>   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c   |  4 +++-
>   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   |  4 +++-
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   |  4 +++-
>   drivers/gpu/drm/amd/amdgpu/si_dma.c  |  3 ++-
>   drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c|  2 +-
>   drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c|  2 +-
>   drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c|  8 ++--
>   drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c|  7 +--
>   drivers/gpu/drm/amd/amdgpu/vce_v3_0.c|  4 +++-
>   drivers/gpu/drm/amd/amdgpu/vce_v4_0.c|  4 +++-
>   drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c| 10 +++---
>   20 files changed, 66 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index b8963b7..0b227ca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -221,8 +221,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
> num_ibs,
>!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, 
> Preamble CE ib must be inserted anyway */
>continue;
>
> - amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0,
> - need_ctx_switch);
> + amdgpu_ring_emit_ib(ring, ib, job, need_ctx_switch);
>need_ctx_switch = false;
>}
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 3cb7fb8..0f0f8fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -229,7 +229,7 @@ struct amdgpu_ring {
>   #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
>   #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
>   #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
> -#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), 
> (vmid), (c))
> +#define amdgpu_ring_emit_ib(r, ib, job, c) ((r)->funcs->emit_ib((r), (ib), 
> (job), (c)))
>   #define amdgpu_ring_emit_pipeline_sync(r) 
> (r)->funcs->emit_pipeline_sync((r))
>   #define amdgpu_ring_emit_vm_flush(r, vmid, addr) 
> (r)->funcs->emit_vm_flush((r), (vmid), (addr))
>   #define amdgpu_ring_emit_fence(r, addr, seq, flags) 
> (r)->funcs->emit_fence((r), (addr), (seq), (flags))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> index 84dd550..8f98641 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> @@ -1032,7 +1032,7 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser 
> *p, uint32_t ib_idx)
>*
>*/
>   void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
> -

Re: [PATCH] drm/amdgpu: Modify the argument of emit_ib interface

2018-10-23 Thread Koenig, Christian
Am 23.10.18 um 14:43 schrieb Zhu, Rex:

>Maybe indeed provide the CSA address as parameter instead. It doesn't
>really matter that it is unused by most IP block implementations.

>Alternative is to use a wrapper function to get the VMID from the job.


I preferred to use a wrapper function to get vmid.


Because if use CSA address as parameter, we need to check which ring it is.

The csa address is different between sdma/gfx/compute.

Yeah, but that can be calculated in the ring specific function. Can't it?

I mean if I'm not completely mistaken it is just an offset which needs to be 
added to the base CSA address.

Christian.



Best Regards

Rex







From: Christian König 

Sent: Tuesday, October 23, 2018 8:09 PM
To: Zhu, Rex; 
amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: Modify the argument of emit_ib interface

Am 23.10.18 um 14:01 schrieb Rex Zhu:
> use the point of struct amdgpu_job as the function
> argument instand of vmid, so the other members of
> struct amdgpu_job can be visit in emit_ib function.
>
> Signed-off-by: Rex Zhu 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  3 +--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c  |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h  |  2 +-
>   drivers/gpu/drm/amd/amdgpu/cik_sdma.c|  3 ++-
>   drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c|  3 ++-
>   drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c|  6 --
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c|  6 --
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 24 +---
>   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c   |  4 +++-
>   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   |  4 +++-
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   |  4 +++-
>   drivers/gpu/drm/amd/amdgpu/si_dma.c  |  3 ++-
>   drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c|  2 +-
>   drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c|  2 +-
>   drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c|  8 ++--
>   drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c|  7 +--
>   drivers/gpu/drm/amd/amdgpu/vce_v3_0.c|  4 +++-
>   drivers/gpu/drm/amd/amdgpu/vce_v4_0.c|  4 +++-
>   drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c| 10 +++---
>   20 files changed, 66 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index b8963b7..0b227ca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -221,8 +221,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
> num_ibs,
>!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, 
> Preamble CE ib must be inserted anyway */
>continue;
>
> - amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0,
> - need_ctx_switch);
> + amdgpu_ring_emit_ib(ring, ib, job, need_ctx_switch);
>need_ctx_switch = false;
>}
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 3cb7fb8..0f0f8fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -229,7 +229,7 @@ struct amdgpu_ring {
>   #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
>   #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
>   #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
> -#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), 
> (vmid), (c))
> +#define amdgpu_ring_emit_ib(r, ib, job, c) ((r)->funcs->emit_ib((r), (ib), 
> (job), (c)))
>   #define amdgpu_ring_emit_pipeline_sync(r) 
> (r)->funcs->emit_pipeline_sync((r))
>   #define amdgpu_ring_emit_vm_flush(r, vmid, addr) 
> (r)->funcs->emit_vm_flush((r), (vmid), (addr))
>   #define amdgpu_ring_emit_fence(r, addr, seq, flags) 
> (r)->funcs->emit_fence((r), (addr), (seq), (flags))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> index 84dd550..8f98641 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> @@ -1032,7 +1032,7 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser 
> *p, uint32_t ib_idx)
>*
>*/
>   void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
> -  unsigned vmid, bool ctx_switch)
> + struct amdgpu_job *job, bool ctx_switch)

The indentation here looks wrong on first glance.

Additional to that I would put the job before the IB in the parameter
list because it is the containing object.

>   {
>amdgpu_ring_write(ring, VCE_CMD_IB);
>amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h 
> 

Re: [PATCH] drm/amdgpu: Modify the argument of emit_ib interface

2018-10-23 Thread Zhu, Rex
>Maybe indeed provide the CSA address as parameter instead. It doesn't
>really matter that it is unused by most IP block implementations.

>Alternative is to use a wrapper function to get the VMID from the job.


I preferred to use a wrapper function to get vmid.


Because if use CSA address as parameter, we need to check which ring it is.

The csa address is different between sdma/gfx/compute.


Best Regards

Rex







From: Christian König 
Sent: Tuesday, October 23, 2018 8:09 PM
To: Zhu, Rex; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: Modify the argument of emit_ib interface

Am 23.10.18 um 14:01 schrieb Rex Zhu:
> use the point of struct amdgpu_job as the function
> argument instand of vmid, so the other members of
> struct amdgpu_job can be visit in emit_ib function.
>
> Signed-off-by: Rex Zhu 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  3 +--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c  |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h  |  2 +-
>   drivers/gpu/drm/amd/amdgpu/cik_sdma.c|  3 ++-
>   drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c|  3 ++-
>   drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c|  6 --
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c|  6 --
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 24 +---
>   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c   |  4 +++-
>   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   |  4 +++-
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   |  4 +++-
>   drivers/gpu/drm/amd/amdgpu/si_dma.c  |  3 ++-
>   drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c|  2 +-
>   drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c|  2 +-
>   drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c|  8 ++--
>   drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c|  7 +--
>   drivers/gpu/drm/amd/amdgpu/vce_v3_0.c|  4 +++-
>   drivers/gpu/drm/amd/amdgpu/vce_v4_0.c|  4 +++-
>   drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c| 10 +++---
>   20 files changed, 66 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index b8963b7..0b227ca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -221,8 +221,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
> num_ibs,
>!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, 
> Preamble CE ib must be inserted anyway */
>continue;
>
> - amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0,
> - need_ctx_switch);
> + amdgpu_ring_emit_ib(ring, ib, job, need_ctx_switch);
>need_ctx_switch = false;
>}
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 3cb7fb8..0f0f8fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -229,7 +229,7 @@ struct amdgpu_ring {
>   #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
>   #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
>   #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
> -#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), 
> (vmid), (c))
> +#define amdgpu_ring_emit_ib(r, ib, job, c) ((r)->funcs->emit_ib((r), (ib), 
> (job), (c)))
>   #define amdgpu_ring_emit_pipeline_sync(r) 
> (r)->funcs->emit_pipeline_sync((r))
>   #define amdgpu_ring_emit_vm_flush(r, vmid, addr) 
> (r)->funcs->emit_vm_flush((r), (vmid), (addr))
>   #define amdgpu_ring_emit_fence(r, addr, seq, flags) 
> (r)->funcs->emit_fence((r), (addr), (seq), (flags))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> index 84dd550..8f98641 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> @@ -1032,7 +1032,7 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser 
> *p, uint32_t ib_idx)
>*
>*/
>   void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
> -  unsigned vmid, bool ctx_switch)
> + struct amdgpu_job *job, bool ctx_switch)

The indentation here looks wrong on first glance.

Additional to that I would put the job before the IB in the parameter
list because it is the containing object.

>   {
>amdgpu_ring_write(ring, VCE_CMD_IB);
>amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
> index a1f209e..06d6d87 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
> @@ -66,7 +66,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, 
> uint32_t handle,
>   int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
>   int 

Re: [PATCH 1/8] dma-buf: remove shared fence staging in reservation object

2018-10-23 Thread Christian König

Ping once more! Adding a few more AMD people.

Any comments on this?

Thanks,
Christian.

Am 12.10.18 um 10:22 schrieb Christian König:

Ping! Adding a few people directly and more mailing lists.

Can I get an acked-by/rb for this? It's only a cleanup and not much 
functional change.


Regards,
Christian.

Am 04.10.2018 um 15:12 schrieb Christian König:

No need for that any more. Just replace the list when there isn't enough
room any more for the additional fence.

Signed-off-by: Christian König 
---
  drivers/dma-buf/reservation.c | 178 
++

  include/linux/reservation.h   |   4 -
  2 files changed, 58 insertions(+), 124 deletions(-)

diff --git a/drivers/dma-buf/reservation.c 
b/drivers/dma-buf/reservation.c

index 6c95f61a32e7..5825fc336a13 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -68,105 +68,23 @@ EXPORT_SYMBOL(reservation_seqcount_string);
   */
  int reservation_object_reserve_shared(struct reservation_object *obj)
  {
-    struct reservation_object_list *fobj, *old;
-    u32 max;
+    struct reservation_object_list *old, *new;
+    unsigned int i, j, k, max;
    old = reservation_object_get_list(obj);
    if (old && old->shared_max) {
-    if (old->shared_count < old->shared_max) {
-    /* perform an in-place update */
-    kfree(obj->staged);
-    obj->staged = NULL;
+    if (old->shared_count < old->shared_max)
  return 0;
-    } else
+    else
  max = old->shared_max * 2;
-    } else
-    max = 4;
-
-    /*
- * resize obj->staged or allocate if it doesn't exist,
- * noop if already correct size
- */
-    fobj = krealloc(obj->staged, offsetof(typeof(*fobj), shared[max]),
-    GFP_KERNEL);
-    if (!fobj)
-    return -ENOMEM;
-
-    obj->staged = fobj;
-    fobj->shared_max = max;
-    return 0;
-}
-EXPORT_SYMBOL(reservation_object_reserve_shared);
-
-static void
-reservation_object_add_shared_inplace(struct reservation_object *obj,
-  struct reservation_object_list *fobj,
-  struct dma_fence *fence)
-{
-    struct dma_fence *signaled = NULL;
-    u32 i, signaled_idx;
-
-    dma_fence_get(fence);
-
-    preempt_disable();
-    write_seqcount_begin(>seq);
-
-    for (i = 0; i < fobj->shared_count; ++i) {
-    struct dma_fence *old_fence;
-
-    old_fence = rcu_dereference_protected(fobj->shared[i],
-    reservation_object_held(obj));
-
-    if (old_fence->context == fence->context) {
-    /* memory barrier is added by write_seqcount_begin */
-    RCU_INIT_POINTER(fobj->shared[i], fence);
-    write_seqcount_end(>seq);
-    preempt_enable();
-
-    dma_fence_put(old_fence);
-    return;
-    }
-
-    if (!signaled && dma_fence_is_signaled(old_fence)) {
-    signaled = old_fence;
-    signaled_idx = i;
-    }
-    }
-
-    /*
- * memory barrier is added by write_seqcount_begin,
- * fobj->shared_count is protected by this lock too
- */
-    if (signaled) {
-    RCU_INIT_POINTER(fobj->shared[signaled_idx], fence);
  } else {
-    BUG_ON(fobj->shared_count >= fobj->shared_max);
- RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence);
-    fobj->shared_count++;
+    max = 4;
  }
  -    write_seqcount_end(>seq);
-    preempt_enable();
-
-    dma_fence_put(signaled);
-}
-
-static void
-reservation_object_add_shared_replace(struct reservation_object *obj,
-  struct reservation_object_list *old,
-  struct reservation_object_list *fobj,
-  struct dma_fence *fence)
-{
-    unsigned i, j, k;
-
-    dma_fence_get(fence);
-
-    if (!old) {
-    RCU_INIT_POINTER(fobj->shared[0], fence);
-    fobj->shared_count = 1;
-    goto done;
-    }
+    new = kmalloc(offsetof(typeof(*new), shared[max]), GFP_KERNEL);
+    if (!new)
+    return -ENOMEM;
    /*
   * no need to bump fence refcounts, rcu_read access
@@ -174,46 +92,45 @@ reservation_object_add_shared_replace(struct 
reservation_object *obj,

   * references from the old struct are carried over to
   * the new.
   */
-    for (i = 0, j = 0, k = fobj->shared_max; i < old->shared_count; 
++i) {

-    struct dma_fence *check;
+    for (i = 0, j = 0, k = max; i < (old ? old->shared_count : 0); 
++i) {

+    struct dma_fence *fence;
  -    check = rcu_dereference_protected(old->shared[i],
-    reservation_object_held(obj));
-
-    if (check->context == fence->context ||
-    dma_fence_is_signaled(check))
-    RCU_INIT_POINTER(fobj->shared[--k], check);
+    fence = rcu_dereference_protected(old->shared[i],
+  reservation_object_held(obj));
+    if (dma_fence_is_signaled(fence))
+    RCU_INIT_POINTER(new->shared[--k], 

Re: [PATCH] drm/amdgpu: Modify the argument of emit_ib interface

2018-10-23 Thread Christian König

Am 23.10.18 um 14:01 schrieb Rex Zhu:

use the point of struct amdgpu_job as the function
argument instand of vmid, so the other members of
struct amdgpu_job can be visit in emit_ib function.

Signed-off-by: Rex Zhu 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  3 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h  |  2 +-
  drivers/gpu/drm/amd/amdgpu/cik_sdma.c|  3 ++-
  drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c|  3 ++-
  drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c|  6 --
  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c|  6 --
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 24 +---
  drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c   |  4 +++-
  drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   |  4 +++-
  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   |  4 +++-
  drivers/gpu/drm/amd/amdgpu/si_dma.c  |  3 ++-
  drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c|  2 +-
  drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c|  2 +-
  drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c|  8 ++--
  drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c|  7 +--
  drivers/gpu/drm/amd/amdgpu/vce_v3_0.c|  4 +++-
  drivers/gpu/drm/amd/amdgpu/vce_v4_0.c|  4 +++-
  drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c| 10 +++---
  20 files changed, 66 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index b8963b7..0b227ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -221,8 +221,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, 
Preamble CE ib must be inserted anyway */
continue;
  
-		amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0,

-   need_ctx_switch);
+   amdgpu_ring_emit_ib(ring, ib, job, need_ctx_switch);
need_ctx_switch = false;
}
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

index 3cb7fb8..0f0f8fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -229,7 +229,7 @@ struct amdgpu_ring {
  #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
  #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
  #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
-#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), 
(vmid), (c))
+#define amdgpu_ring_emit_ib(r, ib, job, c) ((r)->funcs->emit_ib((r), (ib), 
(job), (c)))
  #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
  #define amdgpu_ring_emit_vm_flush(r, vmid, addr) 
(r)->funcs->emit_vm_flush((r), (vmid), (addr))
  #define amdgpu_ring_emit_fence(r, addr, seq, flags) 
(r)->funcs->emit_fence((r), (addr), (seq), (flags))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 84dd550..8f98641 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -1032,7 +1032,7 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser 
*p, uint32_t ib_idx)
   *
   */
  void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
-unsigned vmid, bool ctx_switch)
+   struct amdgpu_job *job, bool ctx_switch)


The indentation here looks wrong on first glance.

Additional to that I would put the job before the IB in the parameter 
list because it is the containing object.



  {
amdgpu_ring_write(ring, VCE_CMD_IB);
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index a1f209e..06d6d87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -66,7 +66,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, 
uint32_t handle,
  int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
  int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
  void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
-unsigned vmid, bool ctx_switch);
+   struct amdgpu_job *job, bool ctx_switch);
  void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
unsigned flags);
  int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 32eb43d..70d4419 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -219,8 +219,9 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring 
*ring, uint32_t count)
   */
  static void 

[PATCH] drm/amdgpu: Modify the argument of emit_ib interface

2018-10-23 Thread Rex Zhu
use the point of struct amdgpu_job as the function
argument instand of vmid, so the other members of
struct amdgpu_job can be visit in emit_ib function.

Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h  |  2 +-
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c|  3 ++-
 drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c|  3 ++-
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c|  6 --
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c|  6 --
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 24 +---
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c   |  4 +++-
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   |  4 +++-
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   |  4 +++-
 drivers/gpu/drm/amd/amdgpu/si_dma.c  |  3 ++-
 drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c|  8 ++--
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c|  7 +--
 drivers/gpu/drm/amd/amdgpu/vce_v3_0.c|  4 +++-
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c|  4 +++-
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c| 10 +++---
 20 files changed, 66 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index b8963b7..0b227ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -221,8 +221,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, 
Preamble CE ib must be inserted anyway */
continue;
 
-   amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0,
-   need_ctx_switch);
+   amdgpu_ring_emit_ib(ring, ib, job, need_ctx_switch);
need_ctx_switch = false;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 3cb7fb8..0f0f8fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -229,7 +229,7 @@ struct amdgpu_ring {
 #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
 #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
 #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
-#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), 
(vmid), (c))
+#define amdgpu_ring_emit_ib(r, ib, job, c) ((r)->funcs->emit_ib((r), (ib), 
(job), (c)))
 #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
 #define amdgpu_ring_emit_vm_flush(r, vmid, addr) 
(r)->funcs->emit_vm_flush((r), (vmid), (addr))
 #define amdgpu_ring_emit_fence(r, addr, seq, flags) 
(r)->funcs->emit_fence((r), (addr), (seq), (flags))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 84dd550..8f98641 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -1032,7 +1032,7 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser 
*p, uint32_t ib_idx)
  *
  */
 void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
-unsigned vmid, bool ctx_switch)
+   struct amdgpu_job *job, bool ctx_switch)
 {
amdgpu_ring_write(ring, VCE_CMD_IB);
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index a1f209e..06d6d87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -66,7 +66,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, 
uint32_t handle,
 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
 int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
 void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
-unsigned vmid, bool ctx_switch);
+   struct amdgpu_job *job, bool ctx_switch);
 void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
unsigned flags);
 int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 32eb43d..70d4419 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -219,8 +219,9 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring 
*ring, uint32_t count)
  */
 static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
  struct amdgpu_ib *ib,
- unsigned vmid, bool ctx_switch)
+ struct amdgpu_job *job, bool ctx_switch)
 {
+ 

Re: [PATCH v3 2/2] drm/amdgpu: Retire amdgpu_ring.ready flag v3

2018-10-23 Thread Christian König

Am 22.10.18 um 22:46 schrieb Andrey Grodzovsky:

Start using drm_gpu_scheduler.ready isntead.

v3:
Add helper function to run ring test and set
sched.ready flag status accordingly, clean explicit
sched.ready sets from the IP specific files.

Signed-off-by: Andrey Grodzovsky 
---



[SNIP]
+
+int amdgpu_ring_test_helper(struct amdgpu_ring *ring)


This needs some kernel doc, with that fixed the patch is Reviewed-by: 
Christian König 


Did you missed my comment on the first patch?

Thanks for the nice cleanup,
Christian.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/5] drm/ttm: use a static ttm_mem_global instance

2018-10-23 Thread Christian König

Am 23.10.18 um 08:22 schrieb Thomas Zimmermann:

Hi Christian

Am 19.10.18 um 18:41 schrieb Christian König:

As the name says we only need one global instance of ttm_mem_global.

Drop all the driver initialization and just use a single exported
instance which is initialized during BO global initialization.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 44 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  1 -
  drivers/gpu/drm/ast/ast_drv.h   |  1 -
  drivers/gpu/drm/ast/ast_ttm.c   | 32 ++
  drivers/gpu/drm/bochs/bochs.h   |  1 -
  drivers/gpu/drm/bochs/bochs_mm.c| 30 ++---
  drivers/gpu/drm/cirrus/cirrus_drv.h |  1 -
  drivers/gpu/drm/cirrus/cirrus_ttm.c | 32 ++
  drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h |  1 -
  drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c | 31 +++--
  drivers/gpu/drm/mgag200/mgag200_drv.h   |  1 -
  drivers/gpu/drm/mgag200/mgag200_ttm.c   | 32 ++
  drivers/gpu/drm/nouveau/nouveau_drv.h   |  1 -
  drivers/gpu/drm/nouveau/nouveau_ttm.c   | 34 ++-
  drivers/gpu/drm/qxl/qxl_drv.h   |  1 -
  drivers/gpu/drm/qxl/qxl_ttm.c   | 28 
  drivers/gpu/drm/radeon/radeon.h |  1 -
  drivers/gpu/drm/radeon/radeon_ttm.c | 26 ---
  drivers/gpu/drm/ttm/ttm_bo.c| 10 --
  drivers/gpu/drm/ttm/ttm_memory.c|  5 +--
  drivers/gpu/drm/virtio/virtgpu_drv.h|  1 -
  drivers/gpu/drm/virtio/virtgpu_ttm.c| 27 ---
  drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  4 +--
  drivers/gpu/drm/vmwgfx/vmwgfx_drv.h |  3 +-
  drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c| 27 ---
  drivers/staging/vboxvideo/vbox_drv.h|  1 -
  drivers/staging/vboxvideo/vbox_ttm.c| 24 --
  include/drm/ttm/ttm_bo_driver.h |  8 ++---
  include/drm/ttm/ttm_memory.h|  4 +--
  29 files changed, 32 insertions(+), 380 deletions(-)

Great that you removed all the global TTM state from all the drivers.
This removes a lot of duplication and simplifies driver development a bit.



diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 9edece6510d3..3006050b1720 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1526,18 +1526,22 @@ void ttm_bo_global_release(struct ttm_bo_global *glob)
  {
kobject_del(>kobj);
kobject_put(>kobj);
+   ttm_mem_global_release(_mem_glob);
  }
  EXPORT_SYMBOL(ttm_bo_global_release);
  
-int ttm_bo_global_init(struct ttm_bo_global *glob,

-  struct ttm_mem_global *mem_glob)
+int ttm_bo_global_init(struct ttm_bo_global *glob)
  {
int ret;
unsigned i;
  
+	ret = ttm_mem_global_init(_mem_glob);

+   if (ret)
+   return ret;
+

What I really dislike about this patch set is that it mixes state and
implementation into that same functions. The original code had a fairly
good separation of both. Now the mechanisms and policies are located in
the same places.[^1]

This looks like a simplification, but from my experience, such code is a
setup for long-term maintenance problems. For example, I can imagine
that someone at some point wants multiple global buffers (e.g., on a
NUMA-like architecture).


That is exactly the thinking which messed up things in the first place. 
Keep in mind that this is the kernel and not a userspace C++ project.


Handling this as global state in a module is perfectly and we already 
support NUMA use cases with this. E.g. if we would have this state 
multiple times I would actually consider this a bug.


So pushing this into a structure makes it much more error prone and 
doesn't buy us anything.


In the long term I'm actually thinking about getting completely rid of 
the ttm_mem_global and ttm_bo_global and just use static variables for this.


Regards,
Christian.



I understand that I'm new here, have no say, and probably don't get the
big picture, but from my point of view, this is not a forward-thinking
change.

Best regards
Thomas

[^1] More philosophically speaking, program state can be global, but
data structures can only be share-able. ttm_mem_global and ttm_bo_global
should be renamed to something like ttm_shared_mem, rsp. ttm_shared_bo.



mutex_init(>device_list_mutex);
spin_lock_init(>lru_lock);
-   glob->mem_glob = mem_glob;
+   glob->mem_glob = _mem_glob;
glob->mem_glob->bo_glob = glob;
glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
  
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c

index 450387c92b63..7704e17c402f 100644
--- 

Re: [PATCH] drm/amdgpu: Fix amdgpu_vm_alloc_pts failed

2018-10-23 Thread Zhu, Rex
Hi Christian,


You can reproduce this issue by allocate CSA buffer in baremetal and map them 
to reserved VM address.


Please see the attached patch.


Best Regards

Rex




From: Christian König 
Sent: Tuesday, October 23, 2018 5:01 PM
To: Zhu, Rex; Zhang, Jerry; amd-gfx@lists.freedesktop.org; Deucher, Alexander; 
Koenig, Christian
Subject: Re: [PATCH] drm/amdgpu: Fix amdgpu_vm_alloc_pts failed

Hi guys,

yeah the root PD doesn't necessarily have a power of two entries.

But what exactly was the problem with the original code? Why does 0x 
doesn't work?

The only possible explanation I can see is that somebody tried to use an 
address which is above max_pfn, or how did that trigger?

Thanks,
Christian.

Am 23.10.18 um 07:42 schrieb Zhu, Rex:

No, if the vm size is small, there may only on root pd entry.

we need to make sure the mask >= 0;


Maybe this change revert Christian's commit:


commit 72af632549b97ead9251bb155f08fefd1fb6f5c3
Author: Christian König 

Date:   Sat Sep 15 10:02:13 2018 +0200

drm/amdgpu: add amdgpu_vm_entries_mask v2

We can't get the mask for the root directory from the number of entries.

So add a new function to avoid that problem.


Best Regards

Rex



From: amd-gfx 

 on behalf of Zhang, Jerry(Junwei) 

Sent: Tuesday, October 23, 2018 1:12 PM
To: Zhu, Rex; 
amd-gfx@lists.freedesktop.org; Deucher, 
Alexander; Koenig, Christian
Subject: Re: [PATCH] drm/amdgpu: Fix amdgpu_vm_alloc_pts failed

On 10/23/2018 11:29 AM, Rex Zhu wrote:
> when the VA address located in the last PD entries,
> the alloc_pts will faile.
>
> Use the right PD mask instand of hardcode, suggested
> by jerry.zhang.
>
> Signed-off-by: Rex Zhu 

Thanks to verify that.
Feel free to add
Reviewed-by: Junwei Zhang 

Also like to get to know some background about these two functions from
Christian.
Perhaps we may make it more simple, e.g. merging them together.

Regards,
Jerry

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 -
>   1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 054633b..3939013 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -202,8 +202,11 @@ static unsigned amdgpu_vm_num_entries(struct 
> amdgpu_device *adev,
>   static uint32_t amdgpu_vm_entries_mask(struct amdgpu_device *adev,
>   unsigned int level)
>   {
> + unsigned shift = amdgpu_vm_level_shift(adev,
> +adev->vm_manager.root_level);
> +
>if (level <= adev->vm_manager.root_level)
> - return 0x;
> + return (round_up(adev->vm_manager.max_pfn, 1 << shift) >> 
> shift) - 1;
>else if (level != AMDGPU_VM_PTB)
>return 0x1ff;
>else

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
amd-gfx Info Page - 
freedesktop.org
lists.freedesktop.org
To see the collection of prior postings to the list, visit the amd-gfx 
Archives.. Using amd-gfx: To post a message to all the list members, send email 
to amd-gfx@lists.freedesktop.org. You can 
subscribe to the list, or change your existing subscription, in the sections 
below.





___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


From a68c029e2bc5f60fd26d1b0705fee2be80b6f604 Mon Sep 17 00:00:00 2001
From: Rex Zhu 
Date: Tue, 23 Oct 2018 17:07:12 +0800
Subject: [PATCH] debug: create and map csa buffer

---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 8 
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 450b0b7..e0d46b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1658,7 +1658,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 			adev->ip_blocks[i].status.hw = true;
 
 			/* right after GMC hw init, we create CSA */
-			if (amdgpu_sriov_vf(adev)) {
+//			if (amdgpu_sriov_vf(adev)) {
 r = amdgpu_allocate_static_csa(adev, >virt.csa_obj,
 AMDGPU_GEM_DOMAIN_VRAM,
 AMDGPU_CSA_SIZE);
@@ -1666,7 +1666,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 	

Re: [PATCH] drm/amdgpu: Fix amdgpu_vm_alloc_pts failed

2018-10-23 Thread Christian König

Hi guys,

yeah the root PD doesn't necessarily have a power of two entries.

But what exactly was the problem with the original code? Why does 
0x doesn't work?


The only possible explanation I can see is that somebody tried to use an 
address which is above max_pfn, or how did that trigger?


Thanks,
Christian.

Am 23.10.18 um 07:42 schrieb Zhu, Rex:


No, if the vm size is small, there may only on root pd entry.

we need to make sure the mask >= 0;


Maybe this change revert Christian's commit:


commit 72af632549b97ead9251bb155f08fefd1fb6f5c3
Author: Christian König 
Date:   Sat Sep 15 10:02:13 2018 +0200

    drm/amdgpu: add amdgpu_vm_entries_mask v2

    We can't get the mask for the root directory from the number of 
entries.


    So add a new function to avoid that problem.


Best Regards

Rex




*From:* amd-gfx  on behalf of 
Zhang, Jerry(Junwei) 

*Sent:* Tuesday, October 23, 2018 1:12 PM
*To:* Zhu, Rex; amd-gfx@lists.freedesktop.org; Deucher, Alexander; 
Koenig, Christian

*Subject:* Re: [PATCH] drm/amdgpu: Fix amdgpu_vm_alloc_pts failed
On 10/23/2018 11:29 AM, Rex Zhu wrote:
> when the VA address located in the last PD entries,
> the alloc_pts will faile.
>
> Use the right PD mask instand of hardcode, suggested
> by jerry.zhang.
>
> Signed-off-by: Rex Zhu 

Thanks to verify that.
Feel free to add
Reviewed-by: Junwei Zhang 

Also like to get to know some background about these two functions from
Christian.
Perhaps we may make it more simple, e.g. merging them together.

Regards,
Jerry

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 -
>   1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

> index 054633b..3939013 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -202,8 +202,11 @@ static unsigned amdgpu_vm_num_entries(struct 
amdgpu_device *adev,

>   static uint32_t amdgpu_vm_entries_mask(struct amdgpu_device *adev,
>   unsigned int level)
>   {
> + unsigned shift = amdgpu_vm_level_shift(adev,
> + adev->vm_manager.root_level);
> +
>    if (level <= adev->vm_manager.root_level)
> - return 0x;
> + return (round_up(adev->vm_manager.max_pfn, 1 << shift) 
>> shift) - 1;

>    else if (level != AMDGPU_VM_PTB)
>    return 0x1ff;
>    else

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
amd-gfx Info Page - freedesktop.org 


lists.freedesktop.org
To see the collection of prior postings to the list, visit the amd-gfx 
Archives.. Using amd-gfx: To post a message to all the list members, 
send email to amd-gfx@lists.freedesktop.org. You can subscribe to the 
list, or change your existing subscription, in the sections below.




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] mm: convert totalram_pages, totalhigh_pages and managed_pages to atomic.

2018-10-23 Thread Joe Perches
On Mon, 2018-10-22 at 22:53 +0530, Arun KS wrote:
> Remove managed_page_count_lock spinlock and instead use atomic
> variables.

Perhaps better to define and use macros for the accesses
instead of specific uses of atomic_long_

Something like:

#define totalram_pages()(unsigned 
long)atomic_long_read(&_totalram_pages)
#define totalram_pages_inc()(unsigned long)atomic_long_inc(&_totalram_pages)
#define totalram_pages_dec()(unsigned long)atomic_long_dec(&_totalram_pages)

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] mm: convert totalram_pages, totalhigh_pages and managed_pages to atomic.

2018-10-23 Thread Arun KS

On 2018-10-23 09:45, Joe Perches wrote:

On Mon, 2018-10-22 at 22:53 +0530, Arun KS wrote:

Remove managed_page_count_lock spinlock and instead use atomic
variables.




Hello Joe,

Perhaps better to define and use macros for the accesses
instead of specific uses of atomic_long_

Something like:

#define totalram_pages()	(unsigned 
long)atomic_long_read(&_totalram_pages)
#define totalram_pages_inc()	(unsigned 
long)atomic_long_inc(&_totalram_pages)
#define totalram_pages_dec()	(unsigned 
long)atomic_long_dec(&_totalram_pages)


That sounds like a nice idea.

Regards,
Arun
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] mm: convert totalram_pages, totalhigh_pages and managed_pages to atomic.

2018-10-23 Thread Huang, Ying
Arun KS  writes:

> Remove managed_page_count_lock spinlock and instead use atomic
> variables.
>
> Suggested-by: Michal Hocko 
> Suggested-by: Vlastimil Babka 
> Signed-off-by: Arun KS 
>
> ---
> As discussed here,
> https://patchwork.kernel.org/patch/10627521/#22261253

My 2 cents.  I think you should include at least part of the discussion
in the patch description to make it more readable by itself.

Best Regards,
Huang, Ying
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/3] drm/amdkfd: Remove unnecessary register setting when invalidating tlb in kfd

2018-10-23 Thread Kuehling, Felix
Patch 1 is Reviewed-by: Felix Kuehling 

Patch 2: I'm not sure we need the "lock" parameter and the invalidation
engine parameter. If we're serious about consolidating TLB invalidation
between amdgpu and KFD, I think we should use the same invalidation
engine and the same lock. Then you also don't need to take the
adev->srbm_mutex any more in write_vmid_invalidate_request, which we
were abusing for this purpose.

Regards,
  Felix


On 2018-10-22 2:41 p.m., Zhao, Yong wrote:
> Those register settings have been done in gfxhub_v1_0_program_invalidation()
> and mmhub_v1_0_program_invalidation().
>
> Change-Id: I9b9b44f17ac2a6ff0c9c78f91885665da75543d0
> Signed-off-by: Yong Zhao 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 17 -
>  1 file changed, 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index 60b5f56c..3ade5d5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -60,11 +60,6 @@
>  #define mmMMHUB_VM_INVALIDATE_ENG16_ACK  0x0705
>  #define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0
>  
> -#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32  0x0727
> -#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0
> -#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32  0x0728
> -#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0
> -
>  #define V9_PIPE_PER_MEC  (4)
>  #define V9_QUEUES_PER_PIPE_MEC   (8)
>  
> @@ -772,18 +767,6 @@ static void write_vmid_invalidate_request(struct kgd_dev 
> *kgd, uint8_t vmid)
>* TODO 2: support range-based invalidation, requires kfg2kgd
>* interface change
>*/
> - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
> - 0x);
> - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
> - 0x001f);
> -
> - WREG32(SOC15_REG_OFFSET(MMHUB, 0,
> - mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
> - 0x);
> - WREG32(SOC15_REG_OFFSET(MMHUB, 0,
> - mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
> - 0x001f);
> -
>   WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req);
>  
>   WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ),
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] mm: convert totalram_pages, totalhigh_pages and managed_pages to atomic.

2018-10-23 Thread Arun Sudhilal
On Mon, Oct 22, 2018 at 11:41 PM Michal Hocko  wrote:
>
> On Mon 22-10-18 22:53:22, Arun KS wrote:
> > Remove managed_page_count_lock spinlock and instead use atomic
> > variables.
>

Hello Michal,
> I assume this has been auto-generated. If yes, it would be better to
> mention the script so that people can review it and regenerate for
> comparision. Such a large change is hard to review manually.

Changes were made partially with script.  For totalram_pages and
totalhigh_pages,

find dir -type f -exec sed -i
's/totalram_pages/atomic_long_read(\_pages)/g' {} \;
find dir -type f -exec sed -i
's/totalhigh_pages/atomic_long_read(\_pages)/g' {} \;

For managed_pages it was mostly manual edits after using,
find mm/ -type f -exec sed -i
's/zone->managed_pages/atomic_long_read(\>managed_pages)/g' {}
\;

Regards,
Arun

> --
> Michal Hocko
> SUSE Labs
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/5] drm/ttm: use a static ttm_mem_global instance

2018-10-23 Thread Thomas Zimmermann
Hi Christian

Am 19.10.18 um 18:41 schrieb Christian König:
> As the name says we only need one global instance of ttm_mem_global.
> 
> Drop all the driver initialization and just use a single exported
> instance which is initialized during BO global initialization.
> 
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 44 
> -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  1 -
>  drivers/gpu/drm/ast/ast_drv.h   |  1 -
>  drivers/gpu/drm/ast/ast_ttm.c   | 32 ++
>  drivers/gpu/drm/bochs/bochs.h   |  1 -
>  drivers/gpu/drm/bochs/bochs_mm.c| 30 ++---
>  drivers/gpu/drm/cirrus/cirrus_drv.h |  1 -
>  drivers/gpu/drm/cirrus/cirrus_ttm.c | 32 ++
>  drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h |  1 -
>  drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c | 31 +++--
>  drivers/gpu/drm/mgag200/mgag200_drv.h   |  1 -
>  drivers/gpu/drm/mgag200/mgag200_ttm.c   | 32 ++
>  drivers/gpu/drm/nouveau/nouveau_drv.h   |  1 -
>  drivers/gpu/drm/nouveau/nouveau_ttm.c   | 34 ++-
>  drivers/gpu/drm/qxl/qxl_drv.h   |  1 -
>  drivers/gpu/drm/qxl/qxl_ttm.c   | 28 
>  drivers/gpu/drm/radeon/radeon.h |  1 -
>  drivers/gpu/drm/radeon/radeon_ttm.c | 26 ---
>  drivers/gpu/drm/ttm/ttm_bo.c| 10 --
>  drivers/gpu/drm/ttm/ttm_memory.c|  5 +--
>  drivers/gpu/drm/virtio/virtgpu_drv.h|  1 -
>  drivers/gpu/drm/virtio/virtgpu_ttm.c| 27 ---
>  drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  4 +--
>  drivers/gpu/drm/vmwgfx/vmwgfx_drv.h |  3 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c| 27 ---
>  drivers/staging/vboxvideo/vbox_drv.h|  1 -
>  drivers/staging/vboxvideo/vbox_ttm.c| 24 --
>  include/drm/ttm/ttm_bo_driver.h |  8 ++---
>  include/drm/ttm/ttm_memory.h|  4 +--
>  29 files changed, 32 insertions(+), 380 deletions(-)

Great that you removed all the global TTM state from all the drivers.
This removes a lot of duplication and simplifies driver development a bit.


> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 9edece6510d3..3006050b1720 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -1526,18 +1526,22 @@ void ttm_bo_global_release(struct ttm_bo_global *glob)
>  {
>   kobject_del(>kobj);
>   kobject_put(>kobj);
> + ttm_mem_global_release(_mem_glob);
>  }
>  EXPORT_SYMBOL(ttm_bo_global_release);
>  
> -int ttm_bo_global_init(struct ttm_bo_global *glob,
> -struct ttm_mem_global *mem_glob)
> +int ttm_bo_global_init(struct ttm_bo_global *glob)
>  {
>   int ret;
>   unsigned i;
>  
> + ret = ttm_mem_global_init(_mem_glob);
> + if (ret)
> + return ret;
> +

What I really dislike about this patch set is that it mixes state and
implementation into that same functions. The original code had a fairly
good separation of both. Now the mechanisms and policies are located in
the same places.[^1]

This looks like a simplification, but from my experience, such code is a
setup for long-term maintenance problems. For example, I can imagine
that someone at some point wants multiple global buffers (e.g., on a
NUMA-like architecture).

I understand that I'm new here, have no say, and probably don't get the
big picture, but from my point of view, this is not a forward-thinking
change.

Best regards
Thomas

[^1] More philosophically speaking, program state can be global, but
data structures can only be share-able. ttm_mem_global and ttm_bo_global
should be renamed to something like ttm_shared_mem, rsp. ttm_shared_bo.


>   mutex_init(>device_list_mutex);
>   spin_lock_init(>lru_lock);
> - glob->mem_glob = mem_glob;
> + glob->mem_glob = _mem_glob;
>   glob->mem_glob->bo_glob = glob;
>   glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_memory.c 
> b/drivers/gpu/drm/ttm/ttm_memory.c
> index 450387c92b63..7704e17c402f 100644
> --- a/drivers/gpu/drm/ttm/ttm_memory.c
> +++ b/drivers/gpu/drm/ttm/ttm_memory.c
> @@ -41,6 +41,9 @@
>  
>  #define TTM_MEMORY_ALLOC_RETRIES 4
>  
> +struct ttm_mem_global ttm_mem_glob;
> +EXPORT_SYMBOL(ttm_mem_glob);
> +
>  struct ttm_mem_zone {
>   struct kobject kobj;
>   struct ttm_mem_global *glob;
> @@ -464,7 +467,6 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
>   ttm_mem_global_release(glob);
>   return ret;
>  }
> -EXPORT_SYMBOL(ttm_mem_global_init);
>  
>  void ttm_mem_global_release(struct ttm_mem_global *glob)
>  {
> @@ -486,7 +488,6 @@ void