RE: [PATCH 3/5] drm/ttm: remove the backing store if no placement is given

2018-03-26 Thread He, Roger

Acked-by: Roger He 

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Sunday, March 25, 2018 6:58 PM
To: linaro-mm-...@lists.linaro.org; linux-me...@vger.kernel.org; 
dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; 
sumit.sem...@linaro.org
Subject: [PATCH 3/5] drm/ttm: remove the backing store if no placement is given

Pipeline removal of the BOs backing store when the placement is given during 
validation.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 
98e06f8bf23b..17e821f01d0a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1078,6 +1078,18 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
uint32_t new_flags;
 
reservation_object_assert_held(bo->resv);
+
+   /*
+* Remove the backing store if no placement is given.
+*/
+   if (!placement->num_placement && !placement->num_busy_placement) {
+   ret = ttm_bo_pipeline_gutting(bo);
+   if (ret)
+   return ret;
+
+   return ttm_tt_create(bo, false);
+   }
+
/*
 * Check whether we need to move buffer.
 */
--
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/5] drm/ttm: keep a reference to transfer pipelined BOs

2018-03-26 Thread He, Roger

Reviewed-by: Roger He 

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Friday, March 16, 2018 9:21 PM
To: linaro-mm-...@lists.linaro.org; linux-me...@vger.kernel.org; 
dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
Subject: [PATCH 2/5] drm/ttm: keep a reference to transfer pipelined BOs

Make sure the transfered BO is never destroy before the transfer BO.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 50 +++
 1 file changed, 30 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 1f730b3f18e5..1ee20558ee31 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -39,6 +39,11 @@
 #include 
 #include 
 
+struct ttm_transfer_obj {
+   struct ttm_buffer_object base;
+   struct ttm_buffer_object *bo;
+};
+
 void ttm_bo_free_old_node(struct ttm_buffer_object *bo)  {
ttm_bo_mem_put(bo, >mem);
@@ -435,7 +440,11 @@ EXPORT_SYMBOL(ttm_bo_move_memcpy);
 
 static void ttm_transfered_destroy(struct ttm_buffer_object *bo)  {
-   kfree(bo);
+   struct ttm_transfer_obj *fbo;
+
+   fbo = container_of(bo, struct ttm_transfer_obj, base);
+   ttm_bo_unref(>bo);
+   kfree(fbo);
 }
 
 /**
@@ -456,14 +465,15 @@ static void ttm_transfered_destroy(struct 
ttm_buffer_object *bo)  static int ttm_buffer_object_transfer(struct 
ttm_buffer_object *bo,
  struct ttm_buffer_object **new_obj)  {
-   struct ttm_buffer_object *fbo;
+   struct ttm_transfer_obj *fbo;
int ret;
 
fbo = kmalloc(sizeof(*fbo), GFP_KERNEL);
if (!fbo)
return -ENOMEM;
 
-   *fbo = *bo;
+   fbo->base = *bo;
+   fbo->bo = ttm_bo_reference(bo);
 
/**
 * Fix up members that we shouldn't copy directly:
@@ -471,25 +481,25 @@ static int ttm_buffer_object_transfer(struct 
ttm_buffer_object *bo,
 */
 
atomic_inc(>bdev->glob->bo_count);
-   INIT_LIST_HEAD(>ddestroy);
-   INIT_LIST_HEAD(>lru);
-   INIT_LIST_HEAD(>swap);
-   INIT_LIST_HEAD(>io_reserve_lru);
-   mutex_init(>wu_mutex);
-   fbo->moving = NULL;
-   drm_vma_node_reset(>vma_node);
-   atomic_set(>cpu_writers, 0);
-
-   kref_init(>list_kref);
-   kref_init(>kref);
-   fbo->destroy = _transfered_destroy;
-   fbo->acc_size = 0;
-   fbo->resv = >ttm_resv;
-   reservation_object_init(fbo->resv);
-   ret = reservation_object_trylock(fbo->resv);
+   INIT_LIST_HEAD(>base.ddestroy);
+   INIT_LIST_HEAD(>base.lru);
+   INIT_LIST_HEAD(>base.swap);
+   INIT_LIST_HEAD(>base.io_reserve_lru);
+   mutex_init(>base.wu_mutex);
+   fbo->base.moving = NULL;
+   drm_vma_node_reset(>base.vma_node);
+   atomic_set(>base.cpu_writers, 0);
+
+   kref_init(>base.list_kref);
+   kref_init(>base.kref);
+   fbo->base.destroy = _transfered_destroy;
+   fbo->base.acc_size = 0;
+   fbo->base.resv = >base.ttm_resv;
+   reservation_object_init(fbo->base.resv);
+   ret = reservation_object_trylock(fbo->base.resv);
WARN_ON(!ret);
 
-   *new_obj = fbo;
+   *new_obj = >base;
return 0;
 }
 
--
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 1/6] drm/ttm: add ttm_bo_pipeline_gutting

2018-03-07 Thread He, Roger
Patch 1,4,5: Acked-by: Roger He 
Patch 2,3,6: Reviewed-by: Roger He 

-Original Message-
From: dri-devel [mailto:dri-devel-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Tuesday, March 06, 2018 10:44 PM
To: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
Subject: [PATCH 1/6] drm/ttm: add ttm_bo_pipeline_gutting

Allows us to gut a BO of it's backing store when the driver says that it isn't 
needed any more.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo.c  | 15 ---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 24 
 include/drm/ttm/ttm_bo_driver.h   |  9 +
 3 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 
ad142a92eb80..98e06f8bf23b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -622,14 +622,23 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo,
 
reservation_object_assert_held(bo->resv);
 
+   placement.num_placement = 0;
+   placement.num_busy_placement = 0;
+   bdev->driver->evict_flags(bo, );
+
+   if (!placement.num_placement && !placement.num_busy_placement) {
+   ret = ttm_bo_pipeline_gutting(bo);
+   if (ret)
+   return ret;
+
+   return ttm_tt_create(bo, false);
+   }
+
evict_mem = bo->mem;
evict_mem.mm_node = NULL;
evict_mem.bus.io_reserved_vm = false;
evict_mem.bus.io_reserved_count = 0;
 
-   placement.num_placement = 0;
-   placement.num_busy_placement = 0;
-   bdev->driver->evict_flags(bo, );
ret = ttm_bo_mem_space(bo, , _mem, ctx);
if (ret) {
if (ret != -ERESTARTSYS) {
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 6d6a3f46143b..1f730b3f18e5 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -801,3 +801,27 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
return 0;
 }
 EXPORT_SYMBOL(ttm_bo_pipeline_move);
+
+int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo) {
+   struct ttm_buffer_object *ghost;
+   int ret;
+
+   ret = ttm_buffer_object_transfer(bo, );
+   if (ret)
+   return ret;
+
+   ret = reservation_object_copy_fences(ghost->resv, bo->resv);
+   /* Last resort, wait for the BO to be idle when we are OOM */
+   if (ret)
+   ttm_bo_wait(bo, false, false);
+
+   memset(>mem, 0, sizeof(bo->mem));
+   bo->mem.mem_type = TTM_PL_SYSTEM;
+   bo->ttm = NULL;
+
+   ttm_bo_unreserve(ghost);
+   ttm_bo_unref();
+
+   return 0;
+}
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h 
index f8e2515b401f..39cd6b086d3a 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -849,6 +849,15 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
 struct dma_fence *fence, bool evict,
 struct ttm_mem_reg *new_mem);
 
+/**
+ * ttm_bo_pipeline_gutting.
+ *
+ * @bo: A pointer to a struct ttm_buffer_object.
+ *
+ * Pipelined gutting a BO of it's backing store.
+ */
+int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo);
+
 /**
  * ttm_io_prot
  *
--
2.14.1

___
dri-devel mailing list
dri-de...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and swapout.

2018-02-23 Thread He, Roger
I missed the Per-VM-BO share the reservation object with root bo. So context is 
not NULL here.
So,  this patch is:

Reviewed-by: Roger He <hongbo...@amd.com>

Thanks
Roger(Hongbo.He)
-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com]
Sent: Friday, February 23, 2018 8:06 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org; linux-ker...@vger.kernel.org
Subject: Re: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and 
swapout.

Am 23.02.2018 um 10:46 schrieb He, Roger:
>
> -Original Message-
> From: dri-devel [mailto:dri-devel-boun...@lists.freedesktop.org] On 
> Behalf Of Christian K?nig
> Sent: Tuesday, February 20, 2018 8:58 PM
> To: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; 
> linux-ker...@vger.kernel.org
> Subject: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and 
> swapout.
>
> This solves the problem that when we swapout a BO from a domain we sometimes 
> couldn't make room for it because holding the lock blocks all other BOs with 
> this reservation object.
>
> Signed-off-by: Christian König <christian.koe...@amd.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c | 33 -
>   1 file changed, 16 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c 
> b/drivers/gpu/drm/ttm/ttm_bo.c index d90b1cf10b27..3a44c2ee4155 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -713,31 +713,30 @@ bool ttm_bo_eviction_valuable(struct 
> ttm_buffer_object *bo,  EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>   
>   /**
> - * Check the target bo is allowable to be evicted or swapout, including 
> cases:
> - *
> - * a. if share same reservation object with ctx->resv, have 
> assumption
> - * reservation objects should already be locked, so not lock again 
> and
> - * return true directly when either the opreation 
> allow_reserved_eviction
> - * or the target bo already is in delayed free list;
> - *
> - * b. Otherwise, trylock it.
> + * Check if the target bo is allowed to be evicted or swapedout.
>*/
>   static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
> - struct ttm_operation_ctx *ctx, bool *locked)
> +struct ttm_operation_ctx *ctx,
> +bool *locked)
>   {
> - bool ret = false;
> + /* First check if we can lock it */
> + *locked = reservation_object_trylock(bo->resv);
> + if (*locked)
> + return true;
>   
> - *locked = false;
> + /* Check if it's locked because it is part of the current operation 
> +*/
>   if (bo->resv == ctx->resv) {
>   reservation_object_assert_held(bo->resv);
> - if (ctx->allow_reserved_eviction || !list_empty(>ddestroy))
> - ret = true;
> - } else {
> - *locked = reservation_object_trylock(bo->resv);
> - ret = *locked;
> + return ctx->allow_reserved_eviction ||
> + !list_empty(>ddestroy);
>   }
>   
> - return ret;
> + /* Check if it's locked because it was already evicted */
> + if (ww_mutex_is_owned_by(>resv->lock, NULL))
> + return true;
>
> For the special case: when command submission with Per-VM-BO enabled, 
> All BOs  a/b/c are always valid BO. After the validation of BOs a and 
> b, when validation of BO c, is it possible to return true and then evict BO a 
> and b by mistake ?
> Because a/b/c share same task_struct.

No, that's why I check the context as well. BOs explicitly reserved 
have a non NULL context while BOs trylocked for swapout havea NULL 
context.

BOs have a non NULL context only when command submission and reserved 
by ttm_eu_re6serve_buffers  .
But for Per-VM-BO a/b/c they always are not in BO list, so they will be 
not reserved and have always NULL context.
So above case also can happen. Anything missing here?  

>
> + /* Some other thread is using it, don't touch it */
> + return false;
>   }
>   
>   static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> --
> 2.14.1
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

___
dri-devel mailing list
dri-de...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and swapout.

2018-02-23 Thread He, Roger


-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: Friday, February 23, 2018 8:06 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org; linux-ker...@vger.kernel.org
Subject: Re: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and 
swapout.

Am 23.02.2018 um 10:46 schrieb He, Roger:
>
> -Original Message-
> From: dri-devel [mailto:dri-devel-boun...@lists.freedesktop.org] On 
> Behalf Of Christian K?nig
> Sent: Tuesday, February 20, 2018 8:58 PM
> To: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; 
> linux-ker...@vger.kernel.org
> Subject: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and 
> swapout.
>
> This solves the problem that when we swapout a BO from a domain we sometimes 
> couldn't make room for it because holding the lock blocks all other BOs with 
> this reservation object.
>
> Signed-off-by: Christian König <christian.koe...@amd.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c | 33 -
>   1 file changed, 16 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c 
> b/drivers/gpu/drm/ttm/ttm_bo.c index d90b1cf10b27..3a44c2ee4155 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -713,31 +713,30 @@ bool ttm_bo_eviction_valuable(struct 
> ttm_buffer_object *bo,  EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>   
>   /**
> - * Check the target bo is allowable to be evicted or swapout, including 
> cases:
> - *
> - * a. if share same reservation object with ctx->resv, have 
> assumption
> - * reservation objects should already be locked, so not lock again 
> and
> - * return true directly when either the opreation 
> allow_reserved_eviction
> - * or the target bo already is in delayed free list;
> - *
> - * b. Otherwise, trylock it.
> + * Check if the target bo is allowed to be evicted or swapedout.
>*/
>   static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
> - struct ttm_operation_ctx *ctx, bool *locked)
> +struct ttm_operation_ctx *ctx,
> +bool *locked)
>   {
> - bool ret = false;
> + /* First check if we can lock it */
> + *locked = reservation_object_trylock(bo->resv);
> + if (*locked)
> + return true;
>   
> - *locked = false;
> + /* Check if it's locked because it is part of the current operation 
> +*/
>   if (bo->resv == ctx->resv) {
>   reservation_object_assert_held(bo->resv);
> - if (ctx->allow_reserved_eviction || !list_empty(>ddestroy))
> - ret = true;
> - } else {
> - *locked = reservation_object_trylock(bo->resv);
> - ret = *locked;
> + return ctx->allow_reserved_eviction ||
> + !list_empty(>ddestroy);
>   }
>   
> - return ret;
> + /* Check if it's locked because it was already evicted */
> + if (ww_mutex_is_owned_by(>resv->lock, NULL))
> + return true;
>
> For the special case: when command submission with Per-VM-BO enabled, 
> All BOs  a/b/c are always valid BO. After the validation of BOs a and 
> b, when validation of BO c, is it possible to return true and then evict BO a 
> and b by mistake ?
> Because a/b/c share same task_struct.

No, that's why I check the context as well. BOs explicitly reserved 
have a non NULL context while BOs trylocked for swapout have a NULL context.

When BOs have a non NULL context only when command submission and reserved by 
ttm_eu_re6serve_buffers  .
But for Per-VM-BO a/b/c they always are not in BO list, so they will be not 
reserved and have always NULL context.
So above case also can happen. Anything missing here?  

Thanks
Roger(Hongbo.He)
>
> + /* Some other thread is using it, don't touch it */
> + return false;
>   }
>   
>   static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> --
> 2.14.1
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/4] drm/amdgpu: use new ww_mutex_is_owned_by function

2018-02-23 Thread He, Roger

Reviewed-by: Roger He 

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Tuesday, February 20, 2018 8:58 PM
To: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; 
linux-ker...@vger.kernel.org
Subject: [PATCH 2/4] drm/amdgpu: use new ww_mutex_is_owned_by function

Instead of accessing ww_mutex internals directly use the provided function to 
check if the ww_mutex was indeed locked by the current command submission.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index eaa3cb0c3ad1..6db26a3144dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1594,7 +1594,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser 
*parser,
*map = mapping;
 
/* Double check that the BO is reserved by this CS */
-   if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != >ticket)
+   if (!ww_mutex_is_owned_by(&(*bo)->tbo.resv->lock, >ticket))
return -EINVAL;
 
if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
--
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and swapout.

2018-02-23 Thread He, Roger


-Original Message-
From: dri-devel [mailto:dri-devel-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Tuesday, February 20, 2018 8:58 PM
To: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; 
linux-ker...@vger.kernel.org
Subject: [PATCH 3/4] drm/ttm: handle already locked BOs during eviction and 
swapout.

This solves the problem that when we swapout a BO from a domain we sometimes 
couldn't make room for it because holding the lock blocks all other BOs with 
this reservation object.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 33 -
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 
d90b1cf10b27..3a44c2ee4155 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -713,31 +713,30 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object 
*bo,  EXPORT_SYMBOL(ttm_bo_eviction_valuable);
 
 /**
- * Check the target bo is allowable to be evicted or swapout, including cases:
- *
- * a. if share same reservation object with ctx->resv, have assumption
- * reservation objects should already be locked, so not lock again and
- * return true directly when either the opreation allow_reserved_eviction
- * or the target bo already is in delayed free list;
- *
- * b. Otherwise, trylock it.
+ * Check if the target bo is allowed to be evicted or swapedout.
  */
 static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
-   struct ttm_operation_ctx *ctx, bool *locked)
+  struct ttm_operation_ctx *ctx,
+  bool *locked)
 {
-   bool ret = false;
+   /* First check if we can lock it */
+   *locked = reservation_object_trylock(bo->resv);
+   if (*locked)
+   return true;
 
-   *locked = false;
+   /* Check if it's locked because it is part of the current operation */
if (bo->resv == ctx->resv) {
reservation_object_assert_held(bo->resv);
-   if (ctx->allow_reserved_eviction || !list_empty(>ddestroy))
-   ret = true;
-   } else {
-   *locked = reservation_object_trylock(bo->resv);
-   ret = *locked;
+   return ctx->allow_reserved_eviction ||
+   !list_empty(>ddestroy);
}
 
-   return ret;
+   /* Check if it's locked because it was already evicted */
+   if (ww_mutex_is_owned_by(>resv->lock, NULL))
+   return true;

For the special case: when command submission with Per-VM-BO enabled,
All BOs  a/b/c are always valid BO. After the validation of BOs a and b,  
when validation of BO c, is it possible to return true and then evict BO a and 
b by mistake ?
Because a/b/c share same task_struct.

Thanks
Roger(Hongbo.He)

+   /* Some other thread is using it, don't touch it */
+   return false;
 }
 
 static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
--
2.14.1

___
dri-devel mailing list
dri-de...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 3/4] drm/ttm: add input parameter force_alloc for ttm_bo_evict_mm

2018-02-08 Thread He, Roger
I can't think of an use case when we don't want this to succeed.

That is true. seems I can simplify more here.

Thanks
Roger(Hongbo.He)
-Original Message-
From: Koenig, Christian 
Sent: Thursday, February 08, 2018 8:58 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org
Subject: Re: [PATCH 3/4] drm/ttm: add input parameter force_alloc for 
ttm_bo_evict_mm

Am 08.02.2018 um 10:06 schrieb Roger He:
> if true, allocate TTM pages regardless of zone global memory account 
> limit. For suspend, We should avoid TTM memory allocate failure then 
> result in suspend failure.

Why the extra parameter for amdgpu_bo_evict_vram ?

I can't think of an use case when we don't want this to succeed.

Christian.

>
> Signed-off-by: Roger He <hongbo...@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |  4 ++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  4 ++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  2 +-
>   drivers/gpu/drm/nouveau/nouveau_drm.c   |  2 +-
>   drivers/gpu/drm/qxl/qxl_object.c|  4 ++--
>   drivers/gpu/drm/radeon/radeon_device.c  |  6 +++---
>   drivers/gpu/drm/radeon/radeon_object.c  |  5 +++--
>   drivers/gpu/drm/radeon/radeon_object.h  |  3 ++-
>   drivers/gpu/drm/ttm/ttm_bo.c| 16 ++--
>   drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  6 +++---
>   include/drm/ttm/ttm_bo_api.h|  5 -
>   12 files changed, 34 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index ee76b46..59ee12c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -763,7 +763,7 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, 
> void *data)
>   struct drm_device *dev = node->minor->dev;
>   struct amdgpu_device *adev = dev->dev_private;
>   
> - seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev));
> + seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev, true));
>   return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index e3fa3d7..3c5f9ca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2168,7 +2168,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
> suspend, bool fbcon)
>   }
>   }
>   /* evict vram memory */
> - amdgpu_bo_evict_vram(adev);
> + amdgpu_bo_evict_vram(adev, true);
>   
>   amdgpu_fence_driver_suspend(adev);
>   
> @@ -2178,7 +2178,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
> suspend, bool fbcon)
>* This second call to evict vram is to evict the gart page table
>* using the CPU.
>*/
> - amdgpu_bo_evict_vram(adev);
> + amdgpu_bo_evict_vram(adev, true);
>   
>   pci_save_state(dev->pdev);
>   if (suspend) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 0338ef6..db813f9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -803,14 +803,14 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
>   return r;
>   }
>   
> -int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
> +int amdgpu_bo_evict_vram(struct amdgpu_device *adev, bool 
> +force_alloc)
>   {
>   /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
>   if (0 && (adev->flags & AMD_IS_APU)) {
>   /* Useless to evict on IGP chips */
>   return 0;
>   }
> - return ttm_bo_evict_mm(>mman.bdev, TTM_PL_VRAM);
> + return ttm_bo_evict_mm(>mman.bdev, TTM_PL_VRAM, force_alloc);
>   }
>   
>   static const char *amdgpu_vram_names[] = { diff --git 
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index c2b02f5..6724cdc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -227,7 +227,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 
> domain,
>u64 min_offset, u64 max_offset,
>u64 *gpu_addr);
>   int amdgpu_bo_unpin(struct amdgpu_bo *bo); -int 
> amdgpu_bo_evict_vram(struct amdgpu_device *adev);
> +int amdgpu_bo_evict_vram(struct amdgpu_device *adev, bool 
> +force_alloc);
>   int amdgpu_bo_init(struct amdgpu_device *adev);
>   vo

RE: [PATCH 0/5] prevent OOM triggered by TTM

2018-02-07 Thread He, Roger
Sure, will make it turn off as default and make the limit configurable.

Thanks
Roger(Hongbo.He)
-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: Wednesday, February 07, 2018 4:41 PM
To: He, Roger <hongbo...@amd.com>; Thomas Hellstrom <tho...@shipmail.org>; 
amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
Cc: Koenig, Christian <christian.koe...@amd.com>
Subject: Re: [PATCH 0/5] prevent OOM triggered by TTM

> AMD prefers return value of no memory rather than OOM for now.
Yeah, but Thomas is right that the default for this feature should be that it 
is turned off.

That's why I said that we should make the limit configurable.

Regards,
Christian.

Am 07.02.2018 um 09:25 schrieb He, Roger:
>   Why should TTM be different in that aspect? It would be good to know 
> your reasoning WRT this?
>
> Now, in TTM struct ttm_bo_device it already has member no_retry to indicate 
> your option.
> If you prefer no OOM triggered by TTM, set it as true. The default is false 
> to keep original behavior.
> AMD prefers return value of no memory rather than OOM for now.
>
>
>
>   One thing I looked at at one point was to have TTM do the swapping 
> itself instead of handing it off to the shmem system. That way we 
> could pre-allocate swap entries for all swappable (BO) memory, making 
> sure that we wouldn't run out of swap space when,
>
> I prefer current mechanism of swap out. At the beginning the swapped pages 
> stay in system memory by shmem until OS move to status with high memory 
> pressure, that has an obvious advantage. For example, if the BO is swapped 
> out into shmem, but not really be flushed into swap disk. When validate it 
> and swap in it at this moment, the overhead is small compared to swap in from 
> disk. In addition, No need swap space reservation for TTM pages when 
> allocation since swap disk is shared not only for TTM exclusive.
> So again we provide a flag no_retry in struct ttm_bo_device to let driver set 
> according to its request.
>
>
> Thanks
> Roger(Hongbo.He)
>
> -Original Message-
> From: Thomas Hellstrom [mailto:tho...@shipmail.org]
> Sent: Wednesday, February 07, 2018 2:43 PM
> To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org; 
> dri-de...@lists.freedesktop.org
> Cc: Koenig, Christian <christian.koe...@amd.com>
> Subject: Re: [PATCH 0/5] prevent OOM triggered by TTM
>
> Hi, Roger,
>
> On 02/06/2018 10:04 AM, Roger He wrote:
>> currently ttm code has no any allocation limit. So it allows pages 
>> allocatation unlimited until OOM. Because if swap space is full of 
>> swapped pages and then system memory will be filled up with ttm pages.
>> and then any memory allocation request will trigger OOM.
>>
> I'm a bit curious, isn't this the way things are supposed to work on a linux 
> system?
> If all memory resources are used up, the OOM killer will kill the most memory 
> hungry (perhaps rogue) process rather than processes being nice and try to 
> find out themselves whether allocations will succeed?
> Why should TTM be different in that aspect? It would be good to know your 
> reasoning WRT this?
>
> Admittedly, graphics process OOM memory accounting doesn't work very well, 
> due to not all BOs not being CPU mapped, but it looks like there is recent 
> work towards fixing this?
>
> One thing I looked at at one point was to have TTM do the swapping itself 
> instead of handing it off to the shmem system. That way we could pre-allocate 
> swap entries for all swappable (BO) memory, making sure that we wouldn't run 
> out of swap space when, for example, hibernating and that would also limit 
> the pinned non-swappable memory (from TTM driver kernel allocations for 
> example) to half the system memory resources.
>
> Thanks,
> Thomas
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 0/5] prevent OOM triggered by TTM

2018-02-06 Thread He, Roger
Move the new call out of ttm_mem_global_reserve() and into 
ttm_page_alloc.c or ttm_page_alloc_dma.c (but keep it in ttm_memory.c).  
ttm_mem_global_reserve() is called for each page allocated and 
si_mem_available() is a bit to heavy for that.

Good idea! Agree with you completely, because initially I also concern that but 
no better way at that time.
Going to improve the patches. Thanks!

-Original Message-
From: Koenig, Christian 
Sent: Tuesday, February 06, 2018 5:52 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org
Cc: tho...@shipmail.org
Subject: Re: [PATCH 0/5] prevent OOM triggered by TTM

Nice work, but a few comments.

First of all you need to reorder the patches. Adding the exceptions to the 
restrictions should come first, then the restriction itself. 
Otherwise we might break a setup in between the patches and that is bad for 
bisecting.

Then make all values configurable, e.g. take a closer look at ttm_memory.c. 
Just add attributes directly under the memory_accounting directory (see 
ttm_mem_global_init).

Additional to that you can't put device specific information (the no_retry 
flag) into ttm_mem_global, that is driver unspecific and won't work like this.

Move the new call out of ttm_mem_global_reserve() and into ttm_page_alloc.c or 
ttm_page_alloc_dma.c (but keep it in ttm_memory.c). 
ttm_mem_global_reserve() is called for each page allocated and
si_mem_available() is a bit to heavy for that.

Maybe name TTM_OPT_FLAG_ALLOW_ALLOC_ANYWAY something like _FORCE_ALLOCATION or 
_ALLOW_OOM.

And please also try if a criteria like (si_mem_available() +
get_nr_swap_pages()) < limit works as well. This way we would have only a 
single new limit.

Regards,
Christian.

Am 06.02.2018 um 10:04 schrieb Roger He:
> currently ttm code has no any allocation limit. So it allows pages 
> allocatation unlimited until OOM. Because if swap space is full of 
> swapped pages and then system memory will be filled up with ttm pages. 
> and then any memory allocation request will trigger OOM.
>
>
> the following patches is for prevent OOM triggered by TTM.
> the basic idea is when allocating TTM pages, check the free swap space 
> firt. if it is less than the fixe limit, reject the allocation request.
> but there are two exceptions which should allow it regardless of zone 
> memory account limit.
> a. page fault
> for ttm_mem_global_reserve if serving for page fault routine,
> because page fault routing already grabbed system memory so the
> allowance of this exception is harmless. Otherwise, it will trigger
>  OOM killer.
> b. suspend
> anyway, we should allow suspend success always.
>
>
> at last, if bdev.no_retry is false (by defaut), keep the original 
> behavior no any change.
>
> Roger He (5):
>drm/ttm: check if the free swap space is under limit 256MB
>drm/ttm: keep original behavior except with flag no_retry
>drm/ttm: use bit flag to replace allow_reserved_eviction in
>  ttm_operation_ctx
>drm/ttm: add bit flag TTM_OPT_FLAG_ALLOW_ALLOC_ANYWAY
>drm/ttm: add input parameter allow_allo_anyway for ttm_bo_evict_mm
>
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  4 +--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |  4 +--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  | 10 +++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  8 +++--
>   drivers/gpu/drm/nouveau/nouveau_drm.c   |  2 +-
>   drivers/gpu/drm/qxl/qxl_object.c|  4 +--
>   drivers/gpu/drm/radeon/radeon_device.c  |  6 ++--
>   drivers/gpu/drm/radeon/radeon_object.c  |  6 ++--
>   drivers/gpu/drm/radeon/radeon_object.h  |  3 +-
>   drivers/gpu/drm/ttm/ttm_bo.c| 19 +++
>   drivers/gpu/drm/ttm/ttm_bo_vm.c |  6 ++--
>   drivers/gpu/drm/ttm/ttm_memory.c| 51 
> ++---
>   drivers/gpu/drm/ttm/ttm_page_alloc_dma.c|  1 -
>   drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  6 ++--
>   include/drm/ttm/ttm_bo_api.h| 14 ++--
>   include/drm/ttm/ttm_memory.h|  6 
>   18 files changed, 111 insertions(+), 43 deletions(-)
>

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 5/5] drm/amdgpu: replace iova debugfs file with iomem

2018-02-04 Thread He, Roger
Patch1 & 2 & 4,   Reviewed-by: Roger He 
Patch 5:  Acked-by: Roger He 

-Original Message-
From: dri-devel [mailto:dri-devel-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Saturday, February 03, 2018 3:10 AM
To: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
Subject: [PATCH 5/5] drm/amdgpu: replace iova debugfs file with iomem

This allows access to pages allocated through the driver with optional IOMMU 
mapping.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 57 -
 1 file changed, 35 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 648c449aaa79..795ceaeb82d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1929,38 +1929,51 @@ static const struct file_operations amdgpu_ttm_gtt_fops 
= {
 
 #endif
 
-static ssize_t amdgpu_iova_to_phys_read(struct file *f, char __user *buf,
-  size_t size, loff_t *pos)
+static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
+size_t size, loff_t *pos)
 {
struct amdgpu_device *adev = file_inode(f)->i_private;
-   int r;
-   uint64_t phys;
struct iommu_domain *dom;
+   ssize_t result = 0;
+   int r;
 
-   // always return 8 bytes
-   if (size != 8)
-   return -EINVAL;
+   dom = iommu_get_domain_for_dev(adev->dev);
 
-   // only accept page addresses
-   if (*pos & 0xFFF)
-   return -EINVAL;
+   while (size) {
+   phys_addr_t addr = *pos & PAGE_MASK;
+   loff_t off = *pos & ~PAGE_MASK;
+   size_t bytes = PAGE_SIZE - off;
+   unsigned long pfn;
+   struct page *p;
+   void *ptr;
 
-   dom = iommu_get_domain_for_dev(adev->dev);
-   if (dom)
-   phys = iommu_iova_to_phys(dom, *pos);
-   else
-   phys = *pos;
+   addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
 
-   r = copy_to_user(buf, , 8);
-   if (r)
-   return -EFAULT;
+   pfn = addr >> PAGE_SHIFT;
+   if (!pfn_valid(pfn))
+   return -EPERM;
+
+   p = pfn_to_page(pfn);
+   if (p->mapping != adev->mman.bdev.dev_mapping)
+   return -EPERM;
+
+   ptr = kmap(p);
+   r = copy_to_user(buf, ptr, bytes);
+   kunmap(p);
+   if (r)
+   return -EFAULT;
 
-   return 8;
+   size -= bytes;
+   *pos += bytes;
+   result += bytes;
+   }
+
+   return result;
 }
 
-static const struct file_operations amdgpu_ttm_iova_fops = {
+static const struct file_operations amdgpu_ttm_iomem_fops = {
.owner = THIS_MODULE,
-   .read = amdgpu_iova_to_phys_read,
+   .read = amdgpu_iomem_read,
.llseek = default_llseek
 };
 
@@ -1973,7 +1986,7 @@ static const struct {  #ifdef 
CONFIG_DRM_AMDGPU_GART_DEBUGFS
{ "amdgpu_gtt", _ttm_gtt_fops, TTM_PL_TT },  #endif
-   { "amdgpu_iova", _ttm_iova_fops, TTM_PL_SYSTEM },
+   { "amdgpu_iomem", _ttm_iomem_fops, TTM_PL_SYSTEM },
 };
 
 #endif
--
2.14.1

___
dri-devel mailing list
dri-de...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 3/5] drm/radeon: remove extra TT unpopulated check

2018-02-04 Thread He, Roger

-Original Message-
From: dri-devel [mailto:dri-devel-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Saturday, February 03, 2018 3:10 AM
To: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
Subject: [PATCH 3/5] drm/radeon: remove extra TT unpopulated check

The subsystem chould check that, not the driver.

Commit log typo, should be "should" rather than " chould".
With that fix,  this patch is Reviewed-by: Roger He 

Thanks
Roger(Hongbo.He)

Signed-off-by: Christian König 
---
 drivers/gpu/drm/radeon/radeon_ttm.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index a0a839bc39bf..42e3ee81a96e 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -728,9 +728,6 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm,
struct radeon_device *rdev;
bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
 
-   if (ttm->state != tt_unpopulated)
-   return 0;
-
if (gtt && gtt->userptr) {
ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
if (!ttm->sg)
-- 
2.14.1

___
dri-devel mailing list
dri-de...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 02/13] drm/amdgpu: drop root shadow sync

2018-01-29 Thread He, Roger

Reviewed-by: Roger He 

Thanks
Roger(Hongbo.He)
-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Saturday, January 27, 2018 4:13 AM
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH 02/13] drm/amdgpu: drop root shadow sync

Completely pointless, it is the same reservation object as the root PD anyway.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a3b9c3976eb3..5e53b7a2d4d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -956,11 +956,6 @@ int amdgpu_vm_update_directories(struct amdgpu_device 
*adev,
amdgpu_ring_pad_ib(ring, params.ib);
amdgpu_sync_resv(adev, >sync, root->tbo.resv,
 AMDGPU_FENCE_OWNER_VM, false);
-   if (root->shadow)
-   amdgpu_sync_resv(adev, >sync,
-root->shadow->tbo.resv,
-AMDGPU_FENCE_OWNER_VM, false);
-
WARN_ON(params.ib->length_dw > ndw);
r = amdgpu_job_submit(job, ring, >entity,
  AMDGPU_FENCE_OWNER_VM, );
--
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: lock/unlock mismatch in ttm_bo.c

2018-01-23 Thread He, Roger
The patch looks fine to me, please send it to dri mail list.

Thanks
Roger(Hongbo.He)

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Tom 
St Denis
Sent: Wednesday, January 24, 2018 3:25 AM
To: Zhou, David(ChunMing) 
Cc: Koenig, Christian ; amd-gfx mailing list 

Subject: Re: lock/unlock mismatch in ttm_bo.c

On 22/01/18 01:42 AM, Chunming Zhou wrote:
> 
> 
> On 2018年01月20日 02:23, Tom St Denis wrote:
>> On 19/01/18 01:14 PM, Tom St Denis wrote:
>>> Hi all,
>>>
>>> In the function ttm_bo_cleanup_refs() it seems possible to get to 
>>> line 551 without entering the block on 516 which means you'll be 
>>> unlocking a mutex that wasn't locked.
>>>
>>> Now it might be that in the course of the API this pattern cannot be 
>>> expressed but it's not clear from the function alone that that is 
>>> the case.
>>
>>
>> Looking further it seems the behaviour depends on locking in parent 
>> callers.  That's kinda a no-no right?  Shouldn't the lock be 
>> taken/released in the same function ideally?
> Same feelings
> 
> Regards,
> David Zhou

Attached is a patch that addresses this.

I can't see any obvious race in functions that call
ttm_bo_cleanup_refs() between the time they let go of the lock and the time 
it's taken again in the call.

Running it on my system doesn't produce anything notable though the KASAN with 
DRI_PRIME=1 issue is still there (this patch neither causes that nor fixes it).

Tom
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 3/3] drm/amdgpu: move static CSA address to top of address space

2018-01-22 Thread He, Roger


-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Monday, January 22, 2018 6:44 PM
To: Liu, Monk ; amd-gfx@lists.freedesktop.org
Subject: [PATCH 3/3] drm/amdgpu: move static CSA address to top of address space

There seems to be a design flaw with this since the address of the static CSA 
is never exported anywhere.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 15 +++  
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  6 --
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index e7dfb7b44b4b..c7d24af03e3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -55,11 +55,10 @@ void amdgpu_free_static_csa(struct amdgpu_device *adev) {
 
 /*
  * amdgpu_map_static_csa should be called during amdgpu_vm_init
- * it maps virtual address "AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE"
- * to this VM, and each command submission of GFX should use this virtual
- * address within META_DATA init package to support SRIOV gfx preemption.
+ * it maps virtual address "AMDGPU_CSA_VADDR" to this VM, and each 
+ command
+ * submission of GFX should use this virtual address within META_DATA 
+ init
+ * package to support SRIOV gfx preemption.
  */
-
 int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  struct amdgpu_bo_va **bo_va)
 {
@@ -90,7 +89,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
return -ENOMEM;
}
 
-   r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, AMDGPU_CSA_VADDR,
+   r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, 
+AMDGPU_CSA_VADDR(adev),
AMDGPU_CSA_SIZE);
if (r) {
DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", 
r); @@ -99,9 +98,9 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
return r;
}
 
-   r = amdgpu_vm_bo_map(adev, *bo_va, AMDGPU_CSA_VADDR, 0, AMDGPU_CSA_SIZE,
-AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
-AMDGPU_PTE_EXECUTABLE);
+   r = amdgpu_vm_bo_map(adev, *bo_va, AMDGPU_CSA_VADDR(adev), 0,
+AMDGPU_CSA_SIZE, AMDGPU_PTE_READABLE |
+AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_EXECUTABLE);
 
if (r) {
DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 6a83425aa9ed..499362b55e45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -251,8 +251,10 @@ struct amdgpu_virt {
uint32_t gim_feature;
 };
 
-#define AMDGPU_CSA_SIZE(8 * 1024)
-#define AMDGPU_CSA_VADDR   (AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE)
+#define AMDGPU_CSA_SIZE(8 * 1024)
+#define AMDGPU_CSA_VADDR(adev) \
+   (((adev)->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT) + \
+AMDGPU_VA_RESERVED_SIZE)
 
If I  understand your intention correctly, it should be that: 
+   (((adev)->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)  - \
+AMDGPU_VA_RESERVED_SIZE)

Thanks
Roger(Hongbo.He)

 #define amdgpu_sriov_enabled(adev) \
 ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
--
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/2] drm/ttm: Don't unreserve swapped BOs that were previously reserved

2018-01-18 Thread He, Roger
good catch! 

Thanks
Roger(Hongbo.He)
-Original Message-
From: Kuehling, Felix 
Sent: Friday, January 19, 2018 12:56 AM
To: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; Koenig, 
Christian <christian.koe...@amd.com>; He, Roger <hongbo...@amd.com>
Cc: Kuehling, Felix <felix.kuehl...@amd.com>
Subject: [PATCH 2/2] drm/ttm: Don't unreserve swapped BOs that were previously 
reserved

If ttm_bo_swapout doesn't own the lock, don't release it. Someone else probably 
depends on it still being locked.

Signed-off-by: Felix Kuehling <felix.kuehl...@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 
62518b6..893003f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1780,8 +1780,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct 
ttm_operation_ctx *ctx)
 * Unreserve without putting on LRU to avoid swapping out an
 * already swapped buffer.
 */
-
-   reservation_object_unlock(bo->resv);
+   if (locked)
+   reservation_object_unlock(bo->resv);
kref_put(>list_kref, ttm_bo_release_list);
return ret;
 }
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [RFC] Per file OOM badness

2018-01-18 Thread He, Roger


-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Michal Hocko
Sent: Friday, January 19, 2018 1:14 AM
To: Grodzovsky, Andrey 
Cc: linux...@kvack.org; amd-gfx@lists.freedesktop.org; 
linux-ker...@vger.kernel.org; dri-de...@lists.freedesktop.org; Koenig, 
Christian 
Subject: Re: [RFC] Per file OOM badness

On Thu 18-01-18 18:00:06, Michal Hocko wrote:
> On Thu 18-01-18 11:47:48, Andrey Grodzovsky wrote:
> > Hi, this series is a revised version of an RFC sent by Christian 
> > König a few years ago. The original RFC can be found at 
> > https://lists.freedesktop.org/archives/dri-devel/2015-September/0897
> > 78.html
> > 
> > This is the same idea and I've just adressed his concern from the 
> > original RFC and switched to a callback into file_ops instead of a new 
> > member in struct file.
> 
> Please add the full description to the cover letter and do not make 
> people hunt links.
> 
> Here is the origin cover letter text
> : I'm currently working on the issue that when device drivers allocate 
> memory on
> : behalf of an application the OOM killer usually doesn't knew about 
> that unless
> : the application also get this memory mapped into their address space.
> : 
> : This is especially annoying for graphics drivers where a lot of the 
> VRAM
> : usually isn't CPU accessible and so doesn't make sense to map into 
> the
> : address space of the process using it.
> : 
> : The problem now is that when an application starts to use a lot of 
> VRAM those
> : buffers objects sooner or later get swapped out to system memory, 
> but when we
> : now run into an out of memory situation the OOM killer obviously 
> doesn't knew
> : anything about that memory and so usually kills the wrong process.

OK, but how do you attribute that memory to a particular OOM killable 
entity? And how do you actually enforce that those resources get freed on the 
oom killer action?

Here I think we need more fine granularity for distinguishing the buffer is 
taking VRAM or system memory.

> : The following set of patches tries to address this problem by 
> introducing a per
> : file OOM badness score, which device drivers can use to give the OOM 
> killer a
> : hint how many resources are bound to a file descriptor so that it 
> can make
> : better decisions which process to kill.

But files are not killable, they can be shared... In other words this doesn't 
help the oom killer to make an educated guess at all.

> : 
> : So question at every one: What do you think about this approach?

I thing is just just wrong semantically. Non-reclaimable memory is a 
pain, especially when there is way too much of it. If you can free that memory 
somehow then you can hook into slab shrinker API and react on the memory 
pressure. If you can account such amemory to a particular process and 
make sure that the consumption is bound by the process life time then we can 
think of an accounting that oom_badness can consider when selecting a victim.

I think you are misunderstanding here.
Actually for now, the memory in TTM Pools already has mm_shrink which is 
implemented in ttm_pool_mm_shrink_init.
And here the memory we want to make it contribute to OOM badness is not in TTM 
Pools.
Because when TTM buffer allocation success, the memory already is removed from 
TTM Pools.  

Thanks
Roger(Hongbo.He)

--
Michal Hocko
SUSE Labs
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [RFC] Per file OOM badness

2018-01-18 Thread He, Roger
Basically the idea is right to me.

1. But we need smaller granularity to control the contribution to OOM badness.
 Because when the TTM buffer resides in VRAM rather than evict to system 
memory, we should not take this account into badness.
 But I think it is not easy to implement.

2. If the TTM buffer(GTT here) is mapped to user for CPU access, not quite sure 
the buffer size is already taken into account for kernel.
 If yes, at last the size will be counted again by your patches.

So, I am thinking if we can counted the TTM buffer size into: 
struct mm_rss_stat {
atomic_long_t count[NR_MM_COUNTERS];
};
Which is done by kernel based on CPU VM (page table).

Something like that:
When GTT allocate suceess:
add_mm_counter(vma->vm_mm, MM_ANONPAGES, buffer_size);

When GTT swapped out:
dec_mm_counter from MM_ANONPAGES frist, then 
add_mm_counter(vma->vm_mm, MM_SWAPENTS, buffer_size);  // or MM_SHMEMPAGES or 
add new item.

Update the corresponding item in mm_rss_stat always.
If that, we can control the status update accurately. 
What do you think about that?
And is there any side-effect for this approach?


Thanks
Roger(Hongbo.He)

-Original Message-
From: dri-devel [mailto:dri-devel-boun...@lists.freedesktop.org] On Behalf Of 
Andrey Grodzovsky
Sent: Friday, January 19, 2018 12:48 AM
To: linux-ker...@vger.kernel.org; linux...@kvack.org; 
dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
Cc: Koenig, Christian 
Subject: [RFC] Per file OOM badness

Hi, this series is a revised version of an RFC sent by Christian König a few 
years ago. The original RFC can be found at 
https://lists.freedesktop.org/archives/dri-devel/2015-September/089778.html

This is the same idea and I've just adressed his concern from the original RFC 
and switched to a callback into file_ops instead of a new member in struct file.

Thanks,
Andrey

___
dri-devel mailing list
dri-de...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH v2 1/2] drm/ttm: Allow page allocations w/o triggering OOM..

2018-01-16 Thread He, Roger

Reviewed-by: Roger He <hongbo...@amd.com>

Thanks
Roger(Hongbo.He)
-Original Message-
From: dri-devel [mailto:dri-devel-boun...@lists.freedesktop.org] On Behalf Of 
Andrey Grodzovsky
Sent: Tuesday, January 16, 2018 11:18 PM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; Koenig, 
Christian <christian.koe...@amd.com>
Cc: He, Roger <hongbo...@amd.com>
Subject: [PATCH v2 1/2] drm/ttm: Allow page allocations w/o triggering OOM..

This to allow drivers to choose to avoid OOM invocation and handle page 
allocation failures instead.

v2:
Remove extra new lines.

Signed-off-by: Andrey Grodzovsky <andrey.grodzov...@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c |  3 +++
 drivers/gpu/drm/ttm/ttm_page_alloc.c |  6 ++
 drivers/gpu/drm/ttm/ttm_page_alloc_dma.c |  3 +++
 drivers/gpu/drm/ttm/ttm_tt.c | 13 +++--
 include/drm/ttm/ttm_bo_driver.h  |  4 
 5 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 
2eb71ff..f32aab1 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -234,6 +234,9 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, 
bool zero_alloc)
if (bdev->need_dma32)
page_flags |= TTM_PAGE_FLAG_DMA32;
 
+   if (bdev->no_retry)
+   page_flags |= TTM_PAGE_FLAG_NO_RETRY;
+
switch (bo->type) {
case ttm_bo_type_device:
if (zero_alloc)
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 0eab24e..f34c843 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -741,6 +741,9 @@ static int ttm_page_pool_get_pages(struct ttm_page_pool 
*pool,
if (ttm_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
gfp_flags |= __GFP_ZERO;
 
+   if (ttm_flags & TTM_PAGE_FLAG_NO_RETRY)
+   gfp_flags |= __GFP_RETRY_MAYFAIL;
+
/* ttm_alloc_new_pages doesn't reference pool so we can run
 * multiple requests in parallel.
 **/
@@ -893,6 +896,9 @@ static int ttm_get_pages(struct page **pages, unsigned 
npages, int flags,
if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
gfp_flags |= __GFP_ZERO;
 
+   if (flags & TTM_PAGE_FLAG_NO_RETRY)
+   gfp_flags |= __GFP_RETRY_MAYFAIL;
+
if (flags & TTM_PAGE_FLAG_DMA32)
gfp_flags |= GFP_DMA32;
else
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c 
b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index c7f01a4..6949ef7 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -920,6 +920,9 @@ static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt 
*ttm_dma, bool huge)
gfp_flags &= ~__GFP_COMP;
}
 
+   if (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY)
+   gfp_flags |= __GFP_RETRY_MAYFAIL;
+
return gfp_flags;
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 
5a046a3..9e4d43d 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -301,7 +301,11 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
swap_space = swap_storage->f_mapping;
 
for (i = 0; i < ttm->num_pages; ++i) {
-   from_page = shmem_read_mapping_page(swap_space, i);
+   gfp_t gfp_mask = mapping_gfp_mask(swap_space);
+
+   gfp_mask |= (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY ? 
__GFP_RETRY_MAYFAIL : 0);
+   from_page = shmem_read_mapping_page_gfp(swap_space, i, 
gfp_mask);
+
if (IS_ERR(from_page)) {
ret = PTR_ERR(from_page);
goto out_err;
@@ -350,10 +354,15 @@ int ttm_tt_swapout(struct ttm_tt *ttm, struct file 
*persistent_swap_storage)
swap_space = swap_storage->f_mapping;
 
for (i = 0; i < ttm->num_pages; ++i) {
+   gfp_t gfp_mask = mapping_gfp_mask(swap_space);
+
+   gfp_mask |= (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY ? 
+__GFP_RETRY_MAYFAIL : 0);
+
from_page = ttm->pages[i];
if (unlikely(from_page == NULL))
continue;
-   to_page = shmem_read_mapping_page(swap_space, i);
+
+   to_page = shmem_read_mapping_page_gfp(swap_space, i, gfp_mask);
if (IS_ERR(to_page)) {
ret = PTR_ERR(to_page);
goto out_err;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h 
index 94064b1..9b417eb 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -86,6 +86,7 @@ struct ttm_backend_func {
 #define TTM_PAGE_FLAG

RE: [PATCH 2/2] drm/amdgpu: Use new TTM flag to avoid OOM triggering.

2018-01-16 Thread He, Roger


-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: Tuesday, January 16, 2018 8:46 PM
To: Grodzovsky, Andrey <andrey.grodzov...@amd.com>; Koenig, Christian 
<christian.koe...@amd.com>; He, Roger <hongbo...@amd.com>; 
dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/2] drm/amdgpu: Use new TTM flag to avoid OOM triggering.

Am 16.01.2018 um 13:43 schrieb Andrey Grodzovsky:
>
>
> On 01/16/2018 03:54 AM, Christian König wrote:
>> Am 16.01.2018 um 07:18 schrieb He, Roger:
>>> -Original Message-
>>> From: Andrey Grodzovsky [mailto:andrey.grodzov...@amd.com]
>>> Sent: Saturday, January 13, 2018 6:29 AM
>>> To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
>>> Cc: Koenig, Christian <christian.koe...@amd.com>; He, Roger 
>>> <hongbo...@amd.com>; Grodzovsky, Andrey <andrey.grodzov...@amd.com>
>>> Subject: [PATCH 2/2] drm/amdgpu: Use new TTM flag to avoid OOM 
>>> triggering.
>>>
>>> This to have a load time option to avoid OOM on RAM allocations.
>>>
>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzov...@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 1 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 4 
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 
>>>   3 files changed, 9 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index b7c181e..1387239 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -127,6 +127,7 @@ extern int amdgpu_job_hang_limit;  extern int 
>>> amdgpu_lbpw;  extern int amdgpu_compute_multipipe;  extern int 
>>> amdgpu_gpu_recovery;
>>> +extern int amdgpu_alloc_no_oom;
>>>     #ifdef CONFIG_DRM_AMDGPU_SI
>>>   extern int amdgpu_si_support;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> index d96f9ac..6e98189 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> @@ -130,6 +130,7 @@ int amdgpu_job_hang_limit = 0;  int amdgpu_lbpw 
>>> = -1;  int amdgpu_compute_multipipe = -1;  int amdgpu_gpu_recovery = 
>>> -1; /* auto */
>>> +int amdgpu_alloc_no_oom = -1; /* auto */
>>>
>>> How about turn it on as default?
>>
>> I think we can even go a step further, drop the module parameter and 
>> just turn it always on for amdgpu.
>>
>> Christian.
>
> Will fix, just a reminder that Roger's patches - [PATCH 1/2] drm/ttm: 
> don't update global memory count for some special cases [PATCH 2/2] 
> drm/ttm: only free pages rather than update global memory count 
> together
>
> Needs to be merged before my patches since the fix a TTM bug on 
> allocation failure.

The second is merged, but I had some comments on the first and Roger 
hasn't replied yet.

Roger what's the status on that one?

Already fixed locally, but not tested yet.  Try to send out today.

Thanks
Roger(Hongbo.He)

>
> Thanks,
> Andrey
>
>>
>>>
>>> Thanks
>>> Roger(Hongbo.He)
>>>
>>> MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in 
>>> megabytes");  module_param_named(vramlimit, amdgpu_vram_limit, int, 
>>> 0600); @@ -285,6 +286,9 @@ module_param_named(compute_multipipe,
>>> amdgpu_compute_multipipe, int, 0444); MODULE_PARM_DESC(gpu_recovery, 
>>> "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = 
>>> auto"); module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 
>>> 0444);
>>>   +MODULE_PARM_DESC(alloc_no_oom, "Allocate RAM without triggering 
>>> OOM
>>> +killer, (1 = enable, 0 = disable, -1 = auto"); 
>>> +module_param_named(alloc_no_oom, amdgpu_alloc_no_oom, int, 0444);
>>> +
>>>   #ifdef CONFIG_DRM_AMDGPU_SI
>>>     #if defined(CONFIG_DRM_RADEON) ||
>>> defined(CONFIG_DRM_RADEON_MODULE) diff --git 
>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> index 5c4c3e0..fc27164 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> @@ -420,6 +420,10 @@ static int amdgpu_bo_do_create(struct 
>>> amdgpu_device *adev,  #endif
>>>     bo->tbo.bdev = >mman.bdev;
>>> +
>>> +    if (amdgpu_alloc_no_oom == 1)
>>> +    bo->tbo.bdev->no_retry = true;
>>> +
>>>   amdgpu_ttm_placement_from_domain(bo, domain);
>>>     r = ttm_bo_init_reserved(>mman.bdev, >tbo, size, 
>>> type,
>>> --
>>> 2.7.4
>>>
>>> ___
>>> dri-devel mailing list
>>> dri-de...@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/2] drm/amdgpu: Use new TTM flag to avoid OOM triggering.

2018-01-15 Thread He, Roger

-Original Message-
From: Andrey Grodzovsky [mailto:andrey.grodzov...@amd.com] 
Sent: Saturday, January 13, 2018 6:29 AM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
Cc: Koenig, Christian <christian.koe...@amd.com>; He, Roger 
<hongbo...@amd.com>; Grodzovsky, Andrey <andrey.grodzov...@amd.com>
Subject: [PATCH 2/2] drm/amdgpu: Use new TTM flag to avoid OOM triggering.

This to have a load time option to avoid OOM on RAM allocations.

Signed-off-by: Andrey Grodzovsky <andrey.grodzov...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 
 3 files changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b7c181e..1387239 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -127,6 +127,7 @@ extern int amdgpu_job_hang_limit;  extern int amdgpu_lbpw;  
extern int amdgpu_compute_multipipe;  extern int amdgpu_gpu_recovery;
+extern int amdgpu_alloc_no_oom;
 
 #ifdef CONFIG_DRM_AMDGPU_SI
 extern int amdgpu_si_support;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index d96f9ac..6e98189 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -130,6 +130,7 @@ int amdgpu_job_hang_limit = 0;  int amdgpu_lbpw = -1;  int 
amdgpu_compute_multipipe = -1;  int amdgpu_gpu_recovery = -1; /* auto */
+int amdgpu_alloc_no_oom = -1; /* auto */

How about turn it on as default?

Thanks
Roger(Hongbo.He)

MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");  
module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -285,6 +286,9 
@@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);  
MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = 
disable, -1 = auto");  module_param_named(gpu_recovery, amdgpu_gpu_recovery, 
int, 0444);
 
+MODULE_PARM_DESC(alloc_no_oom, "Allocate RAM without triggering OOM 
+killer, (1 = enable, 0 = disable, -1 = auto"); 
+module_param_named(alloc_no_oom, amdgpu_alloc_no_oom, int, 0444);
+
 #ifdef CONFIG_DRM_AMDGPU_SI
 
 #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) diff --git 
a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 5c4c3e0..fc27164 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -420,6 +420,10 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, 
 #endif
 
bo->tbo.bdev = >mman.bdev;
+
+   if (amdgpu_alloc_no_oom == 1)
+   bo->tbo.bdev->no_retry = true;
+
amdgpu_ttm_placement_from_domain(bo, domain);
 
r = ttm_bo_init_reserved(>mman.bdev, >tbo, size, type,
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 1/2] drm/ttm: Allow page allocations w/o triggering OOM..

2018-01-15 Thread He, Roger


-Original Message-
From: Andrey Grodzovsky [mailto:andrey.grodzov...@amd.com] 
Sent: Saturday, January 13, 2018 6:29 AM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
Cc: Koenig, Christian <christian.koe...@amd.com>; He, Roger 
<hongbo...@amd.com>; Grodzovsky, Andrey <andrey.grodzov...@amd.com>
Subject: [PATCH 1/2] drm/ttm: Allow page allocations w/o triggering OOM..

This to allow drivers to choose to avoid OOM invocation and handle page 
allocation failures instead.

Signed-off-by: Andrey Grodzovsky <andrey.grodzov...@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c |  3 +++
 drivers/gpu/drm/ttm/ttm_page_alloc.c |  6 ++
 drivers/gpu/drm/ttm/ttm_page_alloc_dma.c |  3 +++
 drivers/gpu/drm/ttm/ttm_tt.c | 13 +++--
 include/drm/ttm/ttm_bo_api.h |  1 +
 include/drm/ttm/ttm_bo_driver.h  |  4 
 6 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 
2eb71ff..f32aab1 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -234,6 +234,9 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, 
bool zero_alloc)
if (bdev->need_dma32)
page_flags |= TTM_PAGE_FLAG_DMA32;
 
+   if (bdev->no_retry)
+   page_flags |= TTM_PAGE_FLAG_NO_RETRY;
+
switch (bo->type) {
case ttm_bo_type_device:
if (zero_alloc)
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 0eab24e..f34c843 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -741,6 +741,9 @@ static int ttm_page_pool_get_pages(struct ttm_page_pool 
*pool,
if (ttm_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
gfp_flags |= __GFP_ZERO;
 
+   if (ttm_flags & TTM_PAGE_FLAG_NO_RETRY)
+   gfp_flags |= __GFP_RETRY_MAYFAIL;
+
/* ttm_alloc_new_pages doesn't reference pool so we can run
 * multiple requests in parallel.
 **/
@@ -893,6 +896,9 @@ static int ttm_get_pages(struct page **pages, unsigned 
npages, int flags,
if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
gfp_flags |= __GFP_ZERO;
 
+   if (flags & TTM_PAGE_FLAG_NO_RETRY)
+   gfp_flags |= __GFP_RETRY_MAYFAIL;
+
if (flags & TTM_PAGE_FLAG_DMA32)
gfp_flags |= GFP_DMA32;
else
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c 
b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index c7f01a4..6949ef7 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -920,6 +920,9 @@ static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt 
*ttm_dma, bool huge)
gfp_flags &= ~__GFP_COMP;
}
 
+   if (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY)
+   gfp_flags |= __GFP_RETRY_MAYFAIL;
+
return gfp_flags;
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 
5a046a3..9e4d43d 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -301,7 +301,11 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
swap_space = swap_storage->f_mapping;
 
for (i = 0; i < ttm->num_pages; ++i) {
-   from_page = shmem_read_mapping_page(swap_space, i);
+   gfp_t gfp_mask = mapping_gfp_mask(swap_space);
+
+   gfp_mask |= (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY ? 
__GFP_RETRY_MAYFAIL : 0);
+   from_page = shmem_read_mapping_page_gfp(swap_space, i, 
gfp_mask);
+
if (IS_ERR(from_page)) {
ret = PTR_ERR(from_page);
goto out_err;
@@ -350,10 +354,15 @@ int ttm_tt_swapout(struct ttm_tt *ttm, struct file 
*persistent_swap_storage)
swap_space = swap_storage->f_mapping;
 
for (i = 0; i < ttm->num_pages; ++i) {
+   gfp_t gfp_mask = mapping_gfp_mask(swap_space);
+
+   gfp_mask |= (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY ? 
+__GFP_RETRY_MAYFAIL : 0);
+
from_page = ttm->pages[i];
if (unlikely(from_page == NULL))
continue;
-   to_page = shmem_read_mapping_page(swap_space, i);
+
+   to_page = shmem_read_mapping_page_gfp(swap_space, i, gfp_mask);
if (IS_ERR(to_page)) {
ret = PTR_ERR(to_page);
goto out_err;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 
2cd025c..099f24b 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -176,6 +176,7 @@ struct ttm_buffer_object {
unsigned long num_pages;
size_t acc_size;
 
+
Please 

RE: [PATCH] drm/amdgpu: optimize moved handling only when vm_debug is inactive

2018-01-03 Thread He, Roger
Fix my concern as well.

Reviewed-by: Roger He 

Thanks
Roger(Hongbo.He)
-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Wednesday, January 03, 2018 8:37 PM
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH] drm/amdgpu: optimize moved handling only when vm_debug is 
inactive

Otherwise we would completely circumvent that debugging feature.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 81505870eebc..cd1752b6afa9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1685,7 +1685,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
if (resv == vm->root.base.bo->tbo.resv)
clear = false;
/* Try to reserve the BO to avoid clearing its ptes */
-   else if (reservation_object_trylock(resv))
+   else if (!amdgpu_vm_debug && reservation_object_trylock(resv))
clear = false;
/* Somebody else is using the BO right now */
else
-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amd/amdgpu: set gtt size according to system memory size only

2018-01-02 Thread He, Roger
Original Message-
From: Michel Dänzer [mailto:mic...@daenzer.net] 
Sent: Wednesday, December 27, 2017 4:58 PM
To: He, Roger <hongbo...@amd.com>; Koenig, Christian 
<christian.koe...@amd.com>; Grodzovsky, Andrey <andrey.grodzov...@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amd/amdgpu: set gtt size according to system memory 
size only

On 2017-12-25 09:45 AM, He, Roger wrote:
> 
> Could you tell me how to duplicate this issue?  Maybe now I can look into it.

 piglit run -x glx-multithread-texture --process-isolation false gpu 
results/gpu.


on my side, not work with above command:

root@jenkins-MS-7984:/home/jenkins/roger/piglit.0902.release# ./piglit run -x 
glx-multithread-texture --process-isolation false gpu results/gpu
usage: piglit [-h] [-f CONFIG_FILE] [-n ] [-d] [-t ]
  [-x ] [-b {junit,json}] [-c | -1]
  [-p {glx,x11_egl,wayland,gbm,mixed_glx_egl}] [--valgrind]
  [--dmesg] [--abort-on-monitored-error] [-s]
  [--junit_suffix JUNIT_SUFFIX] [--junit-subtests]
  [-v | -l {quiet,verbose,dummy,http}] [--test-list TEST_LIST]
  [-o] [--deqp-mustpass-list]
   [ ...] 
piglit: error: unrecognized arguments: --process-isolation

is it because my piglit is too old?
Or any other command can duplicate the failure as well?


Thanks
Roger(Hongbo.He)
-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amd/amdgpu: set gtt size according to system memory size only

2017-12-27 Thread He, Roger
Got it, thank you!

-Original Message-
From: Michel Dänzer [mailto:mic...@daenzer.net] 
Sent: Wednesday, December 27, 2017 4:58 PM
To: He, Roger <hongbo...@amd.com>; Koenig, Christian 
<christian.koe...@amd.com>; Grodzovsky, Andrey <andrey.grodzov...@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amd/amdgpu: set gtt size according to system memory 
size only

On 2017-12-25 09:45 AM, He, Roger wrote:
> 
> Could you tell me how to duplicate this issue?  Maybe now I can look into it.

It happened for me running

 piglit run -x glx-multithread-texture --process-isolation false gpu 
results/gpu.

using the Mesa radeonsi driver on Tonga in a Ryzen 7 1700 (8 cores, 16
threads) system with 16 GB of RAM.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amd/amdgpu: set gtt size according to system memory size only

2017-12-25 Thread He, Roger

-Original Message-
From: Michel Dänzer [mailto:mic...@daenzer.net] 
Sent: Monday, December 11, 2017 7:29 PM
To: He, Roger <hongbo...@amd.com>; Koenig, Christian <christian.koe...@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amd/amdgpu: set gtt size according to system memory 
size only

On 2017-12-11 03:49 AM, He, Roger wrote:
>> From: Michel Dänzer [mailto:mic...@daenzer.net] On 2017-12-07 07:07 
>> PM, Christian König wrote:
>>> 
>>> I think I know what is going on here. The max-texture-size keeps  
>>> increasing the texture size as long as it doesn't fails to allocate 
>>> one.
>>> 
>>> So the "Out of kernel memory" message is actually the desired effect 
>>> (but we should probably remove the message).
>>> 
>>> The price question is what happens after that? Those code paths are 
>>> probably not very well tested.
>> 
>> I'm attaching all the related dmesg output I've captured.
>> Basically, best case is the OOM reaper killing piglit, worst case is 
>> no response to SSH => hard reboot.

BTW, even when the machine stays responsive via SSH, the GPU hangs.


>> Until this becomes more robust, this change should probably be 
>> reverted.
> 
> I have not got any valuable info from error log. Please revert it now 
> as quick solution, later let me check the reason.

I'm busy with other stuff, can you revert it?

Sorry for until now noticing your mail. And thanks Grodzovsky.Andrey for doing 
that.

Could you tell me how to duplicate this issue?  Maybe now I can look into it.

Thanks
Roger(Hongbo.He)





-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/ttm: drop the spin in delayed delete if the trylock doesn't work

2017-12-21 Thread He, Roger

Reviewed-by: Roger He 

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Friday, December 22, 2017 2:06 AM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
Subject: [PATCH] drm/ttm: drop the spin in delayed delete if the trylock 
doesn't work

Thomas actually noticed that, but I didn't realized what he meant until now.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 
60bb5c12b568..84dfa2368a72 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -592,6 +592,8 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device 
*bdev, bool remove_all)
 
} else if (reservation_object_trylock(bo->resv)) {
ttm_bo_cleanup_refs(bo, false, !remove_all, true);
+   } else {
+   spin_unlock(>lru_lock);
}
 
kref_put(>list_kref, ttm_bo_release_list);
--
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 1/7] drm/ttm: call ttm_bo_swapout directly when ttm shrink

2017-12-21 Thread He, Roger
Hi Thomas:

Also I wonder what testing is being performed on these changes prior to 
submission?

Vulkan CTS test with Per VM BO enabled.
After that, Command submit  will not need to provide BO list it will use. It is 
helpful for performance to CPU bound games.

The reason why we enable eviction and swapout here is the test always 
overalloction.

Thanks
Roger(Hongbo.He)
-Original Message-
From: Thomas Hellstrom [mailto:tho...@shipmail.org] 
Sent: Thursday, December 21, 2017 3:34 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org
Subject: Re: [PATCH 1/7] drm/ttm: call ttm_bo_swapout directly when ttm shrink

Roger,

5 out of 7 patches in this series are completely lacking a commit log message, 
and thats not OK. Really.

https://www.kernel.org/doc/html/v4.12/process/submitting-patches.html#describe-your-changes

I'll review these, but IIRC the no_wait in the memory accounting code is 
different in that it doesn't allow sleeping, whereas in the bo code we had 
originally had no_wait_gpu and also no_wait. (IIRC Maarten or Jerome removed 
the latter due to lack of
users.) Seems like these patches confuse the to. At the very least that 
requires some form of motivation.

Also I wonder what testing is being performed on these changes prior to 
submission?
We have pretty high number of critical deployments out there, that we need to 
support.

/Thomas


On 12/20/2017 11:34 AM, Roger He wrote:
> then remove superfluous functions
>
> Change-Id: Iea020f0e30a239e0265e7a1500168c7d7f819bd9
> Signed-off-by: Roger He <hongbo...@amd.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c | 21 +++-
>   drivers/gpu/drm/ttm/ttm_memory.c | 12 ++-
>   include/drm/ttm/ttm_bo_api.h |  1 +
>   include/drm/ttm/ttm_bo_driver.h  |  1 -
>   include/drm/ttm/ttm_memory.h | 69 
> +---
>   5 files changed, 9 insertions(+), 95 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c 
> b/drivers/gpu/drm/ttm/ttm_bo.c index 60bb5c1..fa57aa8 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -42,7 +42,6 @@
>   #include 
>   #include 
>   
> -static int ttm_bo_swapout(struct ttm_mem_shrink *shrink);
>   static void ttm_bo_global_kobj_release(struct kobject *kobj);
>   
>   static struct attribute ttm_bo_count = { @@ -1454,7 +1453,6 @@ 
> static void ttm_bo_global_kobj_release(struct kobject *kobj)
>   struct ttm_bo_global *glob =
>   container_of(kobj, struct ttm_bo_global, kobj);
>   
> - ttm_mem_unregister_shrink(glob->mem_glob, >shrink);
>   __free_page(glob->dummy_read_page);
>   kfree(glob);
>   }
> @@ -1479,6 +1477,7 @@ int ttm_bo_global_init(struct drm_global_reference *ref)
>   mutex_init(>device_list_mutex);
>   spin_lock_init(>lru_lock);
>   glob->mem_glob = bo_ref->mem_glob;
> + glob->mem_glob->bo_glob = glob;
>   glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
>   
>   if (unlikely(glob->dummy_read_page == NULL)) { @@ -1489,14 +1488,6 
> @@ int ttm_bo_global_init(struct drm_global_reference *ref)
>   for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
>   INIT_LIST_HEAD(>swap_lru[i]);
>   INIT_LIST_HEAD(>device_list);
> -
> - ttm_mem_init_shrink(>shrink, ttm_bo_swapout);
> - ret = ttm_mem_register_shrink(glob->mem_glob, >shrink);
> - if (unlikely(ret != 0)) {
> - pr_err("Could not register buffer object swapout\n");
> - goto out_no_shrink;
> - }
> -
>   atomic_set(>bo_count, 0);
>   
>   ret = kobject_init_and_add(
> @@ -1504,8 +1495,6 @@ int ttm_bo_global_init(struct drm_global_reference *ref)
>   if (unlikely(ret != 0))
>   kobject_put(>kobj);
>   return ret;
> -out_no_shrink:
> - __free_page(glob->dummy_read_page);
>   out_no_drp:
>   kfree(glob);
>   return ret;
> @@ -1688,11 +1677,8 @@ EXPORT_SYMBOL(ttm_bo_synccpu_write_release);
>* A buffer object shrink method that tries to swap out the first
>* buffer object on the bo_global::swap_lru list.
>*/
> -
> -static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
> +int ttm_bo_swapout(struct ttm_bo_global *glob)
>   {
> - struct ttm_bo_global *glob =
> - container_of(shrink, struct ttm_bo_global, shrink);
>   struct ttm_buffer_object *bo;
>   int ret = -EBUSY;
>   unsigned i;
> @@ -1774,10 +1760,11 @@ static int ttm_bo_swapout(struct ttm_mem_shrink 
> *shrink)
>   kref_put(>list_kref, ttm_bo_release_list);
>   return ret;
>   }
> +EXPORT_SYMBOL(ttm_bo_swapout);
>   
>   void ttm_bo_swapout_al

RE: [PATCH 3/7] drm/ttm: use an operation ctx for ttm_mem_global_alloc_page

2017-12-20 Thread He, Roger


-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: Wednesday, December 20, 2017 9:36 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org
Subject: Re: [PATCH 3/7] drm/ttm: use an operation ctx for 
ttm_mem_global_alloc_page

Commit message!

Am 20.12.2017 um 11:34 schrieb Roger He:
> Change-Id: I4104a12e09a374b6477a0dd5a8fce26dce27a746
> Signed-off-by: Roger He <hongbo...@amd.com>
> ---
>   drivers/gpu/drm/ttm/ttm_memory.c | 15 ---
>   drivers/gpu/drm/ttm/ttm_page_alloc.c |  6 +-
>   drivers/gpu/drm/ttm/ttm_page_alloc_dma.c |  8 ++--
>   include/drm/ttm/ttm_memory.h |  3 ++-
>   4 files changed, 21 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_memory.c 
> b/drivers/gpu/drm/ttm/ttm_memory.c
> index 525d3b6..8df0755 100644
> --- a/drivers/gpu/drm/ttm/ttm_memory.c
> +++ b/drivers/gpu/drm/ttm/ttm_memory.c
> @@ -539,15 +539,14 @@ int ttm_mem_global_alloc(struct ttm_mem_global *glob, 
> uint64_t memory,
>   EXPORT_SYMBOL(ttm_mem_global_alloc);
>   
>   int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
> -   struct page *page, uint64_t size)
> +   struct page *page, uint64_t size,
> +   struct ttm_operation_ctx *ctx)
>   {
> -
> + int ret;
>   struct ttm_mem_zone *zone = NULL;
> - struct ttm_operation_ctx ctx = {
> - .interruptible = false,
> - .no_wait_gpu = false
> - };
> + bool tmp_no_wait_gpu = ctx->no_wait_gpu;

Mhm, please drop that. That the function might wait for the GPU even 
when the caller requested not to do so sounds like a bug to me.

Yes. I will remove this. Later I will send new patches. Will appreciate If 
Thomas can help to verify that.

Thanks
Roger(Hongbo.He)  
>   
> + ctx->no_wait_gpu = false;
>   /**
>* Page allocations may be registed in a single zone
>* only if highmem or !dma32.
> @@ -560,7 +559,9 @@ int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
>   if (glob->zone_dma32 && page_to_pfn(page) > 0x0010UL)
>   zone = glob->zone_kernel;
>   #endif
> - return ttm_mem_global_alloc_zone(glob, zone, size, );
> + ret = ttm_mem_global_alloc_zone(glob, zone, size, ctx);
> + ctx->no_wait_gpu = tmp_no_wait_gpu;
> + return ret;
>   }
>   
>   void ttm_mem_global_free_page(struct ttm_mem_global *glob, struct 
> page *page, diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
> b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> index b5ba644..8f93ff3 100644
> --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> @@ -1061,6 +1061,10 @@ void ttm_page_alloc_fini(void)
>   int ttm_pool_populate(struct ttm_tt *ttm)
>   {
>   struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
> + struct ttm_operation_ctx ctx = {
> + .interruptible = false,
> + .no_wait_gpu = false
> + };
>   unsigned i;
>   int ret;
>   
> @@ -1076,7 +1080,7 @@ int ttm_pool_populate(struct ttm_tt *ttm)
>   
>   for (i = 0; i < ttm->num_pages; ++i) {
>   ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i],
> - PAGE_SIZE);
> + PAGE_SIZE, );
>   if (unlikely(ret != 0)) {
>   ttm_pool_unpopulate(ttm);
>   return -ENOMEM;
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c 
> b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> index bda00b2..8aac86a 100644
> --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> @@ -927,6 +927,10 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct 
> device *dev)
>   {
>   struct ttm_tt *ttm = _dma->ttm;
>   struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
> + struct ttm_operation_ctx ctx = {
> + .interruptible = false,
> + .no_wait_gpu = false
> + };
>   unsigned long num_pages = ttm->num_pages;
>   struct dma_pool *pool;
>   enum pool_type type;
> @@ -962,7 +966,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct 
> device *dev)
>   break;
>   
>   ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i],
> - pool->size);
> + pool->size, );
>   if (unlikely(ret != 0)) {
>   ttm_dma_unpopula

RE: [PATCH] Revert "drm/amd/amdgpu: set gtt size according to system memory size only"

2017-12-17 Thread He, Roger

Reviewed-by: Roger He <hongbo...@amd.com>

Thanks
Roger(Hongbo.He)
-Original Message-
From: Andrey Grodzovsky [mailto:andrey.grodzov...@amd.com] 
Sent: Saturday, December 16, 2017 3:10 AM
To: amd-gfx@lists.freedesktop.org
Cc: Koenig, Christian <christian.koe...@amd.com>; Daenzer, Michel 
<michel.daen...@amd.com>; He, Roger <hongbo...@amd.com>; Grodzovsky, Andrey 
<andrey.grodzov...@amd.com>
Subject: [PATCH] Revert "drm/amd/amdgpu: set gtt size according to system 
memory size only"

This reverts commit ba851eed895c76be0eb4260bdbeb7e26f9ccfaa2.
With that change piglit max size tests (running with -t max.*size) are causing 
OOM and hard hang on my CZ with 1GB RAM.

Signed-off-by: Andrey Grodzovsky <andrey.grodzov...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c307a7d..814a9c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1329,9 +1329,11 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
struct sysinfo si;
 
si_meminfo();
-   gtt_size = max(AMDGPU_DEFAULT_GTT_SIZE_MB << 20,
-   (uint64_t)si.totalram * si.mem_unit * 3/4);
-   } else
+   gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
+  adev->mc.mc_vram_size),
+  ((uint64_t)si.totalram * si.mem_unit * 3/4));
+   }
+   else
gtt_size = (uint64_t)amdgpu_gtt_size << 20;
r = ttm_bo_init_mm(>mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
if (r) {
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/ttm: enable eviction for Per-VM-BO

2017-12-15 Thread He, Roger
-Original Message-
From: Thomas Hellstrom [mailto:tho...@shipmail.org] 
Sent: Friday, December 15, 2017 2:25 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org
Cc: Koenig, Christian <christian.koe...@amd.com>
Subject: Re: [PATCH] drm/ttm: enable eviction for Per-VM-BO

Hi.

On 12/14/2017 09:10 AM, Roger He wrote:
> Change-Id: I0c6ece0decd18d30ccc94e5c7ca106d351941c62
> Signed-off-by: Roger He <hongbo...@amd.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c | 11 +--
>   1 file changed, 5 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c 
> b/drivers/gpu/drm/ttm/ttm_bo.c index 098b22e..ba5b486 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -707,7 +707,6 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object 
> *bo,
>   EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>   
>   static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> -struct reservation_object *resv,
>  uint32_t mem_type,
>  const struct ttm_place *place,
>  struct ttm_operation_ctx *ctx) @@ -722,8 +721,9 
> @@ static 
> int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>   spin_lock(>lru_lock);
>   for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>   list_for_each_entry(bo, >lru[i], lru) {
> - if (bo->resv == resv) {
> - if (list_empty(>ddestroy))
> + if (bo->resv == ctx->resv) {
> + if (!ctx->allow_reserved_eviction &&
> + list_empty(>ddestroy))
>   continue;

It looks to me like a value of locked==true could leak through from a 
previous loop iteration here, so that locked ends up to betrue if 
(bo->resv == resv  && ctx->allow_reserved_eviction), even if no locking was 
taking place. Perhaps-re-initialize locked to   false on each loop 
iteration?

At  the beginning, I also have this concern about incorrect using of locked. 
So, add patch a few days ago.

init locked again to prevent incorrect unlock
is it help?


Thanks
Roger(Hongbo.He)

/Thomas

>   } else {
>   locked = reservation_object_trylock(bo->resv);
> @@ -835,7 +835,7 @@ static int ttm_bo_mem_force_space(struct 
> ttm_buffer_object *bo,
>   return ret;
>   if (mem->mm_node)
>   break;
> - ret = ttm_mem_evict_first(bdev, bo->resv, mem_type, place, ctx);
> + ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
>   if (unlikely(ret != 0))
>   return ret;
>   } while (1);
> @@ -1332,8 +1332,7 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device 
> *bdev,
>   for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>   while (!list_empty(>lru[i])) {
>   spin_unlock(>lru_lock);
> - ret = ttm_mem_evict_first(bdev, NULL, mem_type,
> -   NULL, );
> + ret = ttm_mem_evict_first(bdev, mem_type, NULL, );
>   if (ret)
>   return ret;
>   spin_lock(>lru_lock);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 1/6] drm/ttm: add on_alloc_stage and reservation into ttm_operation_ctx

2017-12-14 Thread He, Roger
Hi Thomas:

Really sorry for that and will keep that in mind.
If necessary, next time I will send cover letter to provide more background and 
details.


Thanks
Roger(Hongbo.He)

-Original Message-
From: Thomas Hellstrom [mailto:tho...@shipmail.org] 
Sent: Thursday, December 14, 2017 3:21 AM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org
Subject: Re: [PATCH 1/6] drm/ttm: add on_alloc_stage and reservation into 
ttm_operation_ctx

Hi,

I think this series is quite poorly documented. We should have a log message 
explaining the purpose of the commit.
Also since it's not obvious what the series is attempting to achieve, please 
add a 0/X series header message..

/Thomas


On 12/12/2017 10:33 AM, Roger He wrote:
> on_alloc_stage: is this operation on allocation stage
> resv: reservation bo used of this operation
>
> Change-Id: I01ea482e8c7470014196eb218e2ff8913306eef0
> Signed-off-by: Roger He <hongbo...@amd.com>
> ---
>   include/drm/ttm/ttm_bo_api.h | 4 
>   1 file changed, 4 insertions(+)
>
> diff --git a/include/drm/ttm/ttm_bo_api.h 
> b/include/drm/ttm/ttm_bo_api.h index 368eb02..25de597 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -263,6 +263,8 @@ struct ttm_bo_kmap_obj {
>*
>* @interruptible: Sleep interruptible if sleeping.
>* @no_wait_gpu: Return immediately if the GPU is busy.
> + * @on_alloc_stage: is this operation on allocation stage
> + * @resv: resvation bo used
>*
>* Context for TTM operations like changing buffer placement or general 
> memory
>* allocation.
> @@ -270,6 +272,8 @@ struct ttm_bo_kmap_obj {
>   struct ttm_operation_ctx {
>   bool interruptible;
>   bool no_wait_gpu;
> + bool on_alloc_stage;
> + struct reservation_object *resv;
>   uint64_t bytes_moved;
>   };
>   


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 5/6] drm/ttm: enable eviction for Per-VM-BO

2017-12-12 Thread He, Roger
-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: Tuesday, December 12, 2017 6:40 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org
Subject: Re: [PATCH 5/6] drm/ttm: enable eviction for Per-VM-BO

Am 12.12.2017 um 10:33 schrieb Roger He:
> Change-Id: I491d4ceb8c98bb3d8e6e0ddef2330284ce2fe5f6
> Signed-off-by: Roger He <hongbo...@amd.com>

I would squash this one with patch #6.

> ---
>   drivers/gpu/drm/ttm/ttm_bo.c | 7 +++
>   1 file changed, 3 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c 
> b/drivers/gpu/drm/ttm/ttm_bo.c index eb8c568..22b6ca5 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -722,10 +722,9 @@ static int ttm_mem_evict_first(struct ttm_bo_device 
> *bdev,
>   spin_lock(>lru_lock);
>   for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>   list_for_each_entry(bo, >lru[i], lru) {
> - if (bo->resv == resv) {
> - if (list_empty(>ddestroy))
> - continue;
> - } else {
> + if (!ctx ||
> + !(ctx->on_alloc_stage &&
> + bo->resv == ctx->resv)) {

Coding style: The lines stating with "!(ctx" and "bo->resv" are to far 
to the right.

Additional to that I think ctx is mandatory and doesn't need a check 
(but might be wrong). If it isn't it's probably time to make itmandatory.

And I would use (ctx->on_alloc_stage || list_empty(>ddestroy)) as 
check, we probably still want to be able to handledeleted BOs here 
during CS.

Change that as below, I think it can cover the case you described. No matter 
the Bo is in deleted list or not.
if (!ctx->allow_reserved_eviction || bo->resv != ctx->resv) {
locked = 
kcl_reservation_object_trylock(bo->resv);
if (!locked)
continue;
}

Thanks
Roger(Hongbo.He)



Christian.

>   locked = reservation_object_trylock(bo->resv);
>   if (!locked)
>   continue;

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 4/6] drm/ttm: init locked again to prevent incorrect unlock

2017-12-12 Thread He, Roger
That is a bug fix, isn't it? If yes maybe add CC:stable and commit it 
first before all other patches.

Fortunately so far there is no issue directly resulted from that.

Thanks
Roger(Hongbo.He)

-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: Tuesday, December 12, 2017 6:37 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org
Subject: Re: [PATCH 4/6] drm/ttm: init locked again to prevent incorrect unlock

Am 12.12.2017 um 10:33 schrieb Roger He:
> Change-Id: I8db51d843955f5db14bb4bbff892eaedbd9f0abe
> Signed-off-by: Roger He <hongbo...@amd.com>

Reviewed-by: Christian König <christian.koe...@amd.com>

That is a bug fix, isn't it? If yes maybe add CC:stable and commit it first 
before all other patches.

Christian.

> ---
>   drivers/gpu/drm/ttm/ttm_bo.c | 1 +
>   1 file changed, 1 insertion(+)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c 
> b/drivers/gpu/drm/ttm/ttm_bo.c index 17fe8be..eb8c568 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -735,6 +735,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> place)) {
>   if (locked)
>   reservation_object_unlock(bo->resv);
> + locked = false;
>   continue;
>   }
>   break;

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 3/6] drm/ttm: use an ttm operation ctx for ttm_bo_move_xxx

2017-12-12 Thread He, Roger
-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: Tuesday, December 12, 2017 6:34 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org
Subject: Re: [PATCH 3/6] drm/ttm: use an ttm operation ctx for ttm_bo_move_xxx

Am 12.12.2017 um 10:33 schrieb Roger He:
> include ttm_bo_move_memcpy and ttm_bo_move_ttm
>
> Change-Id: I160b2fe1da3200405810d0215c4521b5f0d3615a
> Signed-off-by: Roger He <hongbo...@amd.com>

Reviewed-by: Christian König <christian.koe...@amd.com>

   But please separate that out and wait for a few days before 
committing, maybe some nouveau devs have objections.

Ok.

Thanks
Roger(Hongbo.He)

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 +++
>   drivers/gpu/drm/nouveau/nouveau_bo.c| 7 +++
>   drivers/gpu/drm/qxl/qxl_ttm.c   | 3 +--
>   drivers/gpu/drm/radeon/radeon_ttm.c | 7 +++
>   drivers/gpu/drm/ttm/ttm_bo.c| 6 ++
>   drivers/gpu/drm/ttm/ttm_bo_util.c   | 8 
>   include/drm/ttm/ttm_bo_driver.h | 4 ++--
>   7 files changed, 18 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 7db9556..c307a7d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -505,7 +505,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object 
> *bo, bool evict,
>   if (unlikely(r)) {
>   goto out_cleanup;
>   }
> - r = ttm_bo_move_ttm(bo, ctx->interruptible, ctx->no_wait_gpu, new_mem);
> + r = ttm_bo_move_ttm(bo, ctx, new_mem);
>   out_cleanup:
>   ttm_bo_mem_put(bo, _mem);
>   return r;
> @@ -536,7 +536,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object 
> *bo, bool evict,
>   if (unlikely(r)) {
>   return r;
>   }
> - r = ttm_bo_move_ttm(bo, ctx->interruptible, ctx->no_wait_gpu, _mem);
> + r = ttm_bo_move_ttm(bo, ctx, _mem);
>   if (unlikely(r)) {
>   goto out_cleanup;
>   }
> @@ -597,8 +597,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object 
> *bo, bool evict,
>   
>   if (r) {
>   memcpy:
> - r = ttm_bo_move_memcpy(bo, ctx->interruptible,
> -ctx->no_wait_gpu, new_mem);
> + r = ttm_bo_move_memcpy(bo, ctx, new_mem);
>   if (r) {
>   return r;
>   }
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
> b/drivers/gpu/drm/nouveau/nouveau_bo.c
> index 949bf6b..6b6fb20 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> @@ -1226,7 +1226,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, 
> bool evict, bool intr,
>   if (ret)
>   goto out;
>   
> - ret = ttm_bo_move_ttm(bo, intr, no_wait_gpu, new_reg);
> + ret = ttm_bo_move_ttm(bo, , new_reg);
>   out:
>   ttm_bo_mem_put(bo, _reg);
>   return ret;
> @@ -1255,7 +1255,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, 
> bool evict, bool intr,
>   if (ret)
>   return ret;
>   
> - ret = ttm_bo_move_ttm(bo, intr, no_wait_gpu, _reg);
> + ret = ttm_bo_move_ttm(bo, , _reg);
>   if (ret)
>   goto out;
>   
> @@ -1380,8 +1380,7 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool 
> evict,
>   /* Fallback to software copy. */
>   ret = ttm_bo_wait(bo, ctx->interruptible, ctx->no_wait_gpu);
>   if (ret == 0)
> - ret = ttm_bo_move_memcpy(bo, ctx->interruptible,
> -  ctx->no_wait_gpu, new_reg);
> + ret = ttm_bo_move_memcpy(bo, ctx, new_reg);
>   
>   out:
>   if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) { 
> diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c 
> b/drivers/gpu/drm/qxl/qxl_ttm.c index d866f32..78ce118 100644
> --- a/drivers/gpu/drm/qxl/qxl_ttm.c
> +++ b/drivers/gpu/drm/qxl/qxl_ttm.c
> @@ -357,8 +357,7 @@ static int qxl_bo_move(struct ttm_buffer_object *bo, bool 
> evict,
>   qxl_move_null(bo, new_mem);
>   return 0;
>   }
> - return ttm_bo_move_memcpy(bo, ctx->interruptible, ctx->no_wait_gpu,
> -   new_mem);
> + return ttm_bo_move_memcpy(bo, ctx, new_mem);
>   }
>   
>   static void qxl_bo_move_notify(struct ttm_buffer_object *bo, diff 
> --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
> b/drivers/gpu/drm/radeon/radeon_ttm.c
> index 98e30d7..557fd79 100644
> --- a/drivers/gpu/drm/radeon/ra

RE: [PATCH 1/7] drm/ttm:fix incorrect calculate on shrink_pages

2017-12-12 Thread He, Roger

Reviewed-by: Roger He 

Thanks
Roger(Hongbo.He)
-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Monk 
Liu
Sent: Tuesday, December 12, 2017 5:47 PM
To: amd-gfx@lists.freedesktop.org
Cc: Liu, Monk 
Subject: [PATCH 1/7] drm/ttm:fix incorrect calculate on shrink_pages

shrink_pages is in unit of Order after ttm_page_pool_free, but it is used by 
nr_free in next round so need change it into native page unit

Change-Id: I33b77ac1616e24b1b881eee54c3bd7342cfa9ab8
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/ttm/ttm_page_alloc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 44343a2..71945cc 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -455,6 +455,7 @@ ttm_pool_shrink_scan(struct shrinker *shrink, struct 
shrink_control *sc)
freed += (nr_free_pool - shrink_pages) << pool->order;
if (freed >= sc->nr_to_scan)
break;
+   shrink_pages <<= pool->order;
}
mutex_unlock();
return freed;
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amd/amdgpu: set gtt size according to system memory size only

2017-12-10 Thread He, Roger
-Original Message-
From: Michel Dänzer [mailto:mic...@daenzer.net] 
Sent: Saturday, December 09, 2017 1:26 AM
To: Koenig, Christian <christian.koe...@amd.com>; He, Roger <hongbo...@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amd/amdgpu: set gtt size according to system memory 
size only

On 2017-12-07 07:07 PM, Christian König wrote:
> Am 07.12.2017 um 18:34 schrieb Michel Dänzer:
>> On 2017-11-29 10:12 AM, Roger He wrote:
>>> Change-Id: Ib634375b90d875fe04a890fc82fb1e3b7112676a
>>> Signed-off-by: Roger He <hongbo...@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 +++-
>>>   1 file changed, 3 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index 17bf0ce..d0661907 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -1330,11 +1330,9 @@ int amdgpu_ttm_init(struct amdgpu_device 
>>> *adev)
>>>   struct sysinfo si;
>>>     si_meminfo();
>>> -    gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
>>> -   adev->mc.mc_vram_size),
>>> -   ((uint64_t)si.totalram * si.mem_unit * 3/4));
>>> -    }
>>> -    else
>>> +    gtt_size = max(AMDGPU_DEFAULT_GTT_SIZE_MB << 20,
>>> +    (uint64_t)si.totalram * si.mem_unit * 3/4);
>>> +    } else
>>>   gtt_size = (uint64_t)amdgpu_gtt_size << 20;
>>>   r = ttm_bo_init_mm(>mman.bdev, TTM_PL_TT, gtt_size >> 
>>> PAGE_SHIFT);
>>>   if (r) {
>>>
>> I'm unable to finish a piglit run (using Mesa on Tonga in a Ryzen 7 
>> 1700 system with 16 GB of RAM) with this change. Before, I had
>>
>>   [drm] amdgpu: 3072M of GTT memory ready.
>>
>> now it's
>>
>>   [drm] amdgpu: 10473M of GTT memory ready.
>>
>> While running piglit, there's lots of
>>
>>   [TTM] Out of kernel memory
>>
>> messages, followed by more badness, and eventually the machine 
>> becomes inaccessible via SSH and has to be hard rebooted.
>>
>>
>> It occurred to me one thing not being taken into account here is that 
>> system memory is also needed for storing the contents of BOs evicted 
>> from VRAM. So I tried subtracting the VRAM size, resulting in
>>
>>   [drm] amdgpu: 8425M of GTT memory ready.
>>
>> but the problem still happened. So I tried 1/2 instead of 3/4 of RAM, 
>> resulting in
>>
>>   [drm] amdgpu: 6982M of GTT memory ready.
>>
>> and was able to finish a piglit run with that.
> 
> I think I know what is going on here. The max-texture-size keeps 
> increasing the texture size as long as it doesn't fails to allocate one.
> 
> So the "Out of kernel memory" message is actually the desired effect 
> (but we should probably remove the message).
> 
> The price question is what happens after that? Those code paths are 
> probably not very well tested.

I'm attaching all the related dmesg output I've captured. Basically, best case 
is the OOM reaper killing piglit, worst case is no response to SSH => hard 
reboot.


 Until this becomes more robust, this change should probably be 
reverted.

I have not got any valuable info from error log.
Please revert it now as quick solution, later let me check the reason. 


Thanks
Roger(Hongbo.He)

On 2017-12-08 02:52 AM, He, Roger wrote:
>  [TTM] Out of kernel memory The direct reason is GTT 
> BO swap out failure which results from more Bo allocation. And  along with 
> that need more acc_size.
> But why swap out failure I am not sure that is expected here for this case, 
> maybe need to investigate.

FWIW, though it's probably not directly related: This happens with the swap 
partition (8GB) completely unused.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amd/amdgpu: set gtt size according to system memory size only

2017-12-07 Thread He, Roger
 [TTM] Out of kernel memory
The direct reason is GTT BO swap out failure which results from more Bo 
allocation. And  along with that need more acc_size.
But why swap out failure I am not sure that is expected here for this case, 
maybe need to investigate.

Thanks
Roger(Hongbo.He)
-Original Message-
From: Michel Dänzer [mailto:mic...@daenzer.net] 
Sent: Friday, December 08, 2017 1:34 AM
To: He, Roger <hongbo...@amd.com>
Cc: amd-gfx@lists.freedesktop.org; Koenig, Christian <christian.koe...@amd.com>
Subject: Re: [PATCH] drm/amd/amdgpu: set gtt size according to system memory 
size only

On 2017-11-29 10:12 AM, Roger He wrote:
> Change-Id: Ib634375b90d875fe04a890fc82fb1e3b7112676a
> Signed-off-by: Roger He <hongbo...@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 +++-
>  1 file changed, 3 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 17bf0ce..d0661907 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1330,11 +1330,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>   struct sysinfo si;
>  
>   si_meminfo();
> - gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
> -adev->mc.mc_vram_size),
> -((uint64_t)si.totalram * si.mem_unit * 3/4));
> - }
> - else
> + gtt_size = max(AMDGPU_DEFAULT_GTT_SIZE_MB << 20,
> + (uint64_t)si.totalram * si.mem_unit * 3/4);
> + } else
>   gtt_size = (uint64_t)amdgpu_gtt_size << 20;
>   r = ttm_bo_init_mm(>mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
>   if (r) {
> 

I'm unable to finish a piglit run (using Mesa on Tonga in a Ryzen 7 1700 system 
with 16 GB of RAM) with this change. Before, I had

 [drm] amdgpu: 3072M of GTT memory ready.

now it's

 [drm] amdgpu: 10473M of GTT memory ready.

While running piglit, there's lots of

 [TTM] Out of kernel memory

messages, followed by more badness, and eventually the machine becomes 
inaccessible via SSH and has to be hard rebooted.


It occurred to me one thing not being taken into account here is that system 
memory is also needed for storing the contents of BOs evicted from VRAM. So I 
tried subtracting the VRAM size, resulting in

 [drm] amdgpu: 8425M of GTT memory ready.

but the problem still happened. So I tried 1/2 instead of 3/4 of RAM, resulting 
in

 [drm] amdgpu: 6982M of GTT memory ready.

and was able to finish a piglit run with that.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/ttm: swap consecutive allocated pooled pages v4

2017-12-05 Thread He, Roger
Reviewed-by: Roger He <hongbo...@amd.com>

Thanks
Roger(Hongbo.He)
-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: Tuesday, December 05, 2017 8:55 PM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; He, Roger 
<hongbo...@amd.com>
Subject: [PATCH] drm/ttm: swap consecutive allocated pooled pages v4

When we detect consecutive allocation of pages swap them to avoid accidentally 
freeing them as huge page.

v2: use swap
v3: check if it's really the first allocated page
v4: don't touch the loop variable

Signed-off-by: Christian König <christian.koe...@amd.com>
---
 drivers/gpu/drm/ttm/ttm_page_alloc.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index b6f7ce286fb1..44343a2bf55c 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -958,8 +958,15 @@ static int ttm_get_pages(struct page **pages, unsigned 
npages, int flags,
r = ttm_page_pool_get_pages(pool, , flags, cstate,
npages - count, 0);
 
-   list_for_each_entry(p, , lru)
-   pages[count++] = p;
+   first = count;
+   list_for_each_entry(p, , lru) {
+   struct page *tmp = p;
+
+   /* Swap the pages if we detect consecutive order */
+   if (count > first && pages[count - 1] == tmp - 1)
+   swap(tmp, pages[count - 1]);
+   pages[count++] = tmp;
+   }
 
if (r) {
/* If there is any pages in the list put them back to
--
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/2] drm/ttm: swap consecutive allocated pooled pages v3

2017-12-04 Thread He, Roger
Hold on,  only Patch 1 is Reviewed-by: Roger He <hongbo...@amd.com>.

For Patch 2:

+   list_for_each_entry(p, , lru) {
+   /* Swap the pages if we detect consecutive order */
+   if (count > first && pages[count - 1] == p - 1) {   

+   swap(p, pages[count - 1]);// swap seems breaking 
the plist, after that iteration plist will not work.
}
pages[count++] = temp;
+   }

-Original Message-
From: dri-devel [mailto:dri-devel-boun...@lists.freedesktop.org] On Behalf Of 
He, Roger
Sent: Tuesday, December 05, 2017 10:01 AM
To: Christian König <ckoenig.leichtzumer...@gmail.com>; 
dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH 2/2] drm/ttm: swap consecutive allocated pooled pages v3

Series is:
Reviewed-by: Roger He <hongbo...@amd.com>

Thanks
Roger(Hongbo.He)
-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: Monday, December 04, 2017 8:46 PM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; He, Roger 
<hongbo...@amd.com>
Subject: [PATCH 2/2] drm/ttm: swap consecutive allocated pooled pages v3

When we detect consecutive allocation of pages swap them to avoid accidentally 
freeing them as huge page.

v2: use swap
v3: check if it's really the first allocated page

Signed-off-by: Christian König <christian.koe...@amd.com>
---
 drivers/gpu/drm/ttm/ttm_page_alloc.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 7c4d4edce0ba..da8a50f7c8fe 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -954,8 +954,13 @@ static int ttm_get_pages(struct page **pages, unsigned 
npages, int flags,
r = ttm_page_pool_get_pages(pool, , flags, cstate,
npages - count, 0);
 
-   list_for_each_entry(p, , lru)
+   first = count;
+   list_for_each_entry(p, , lru) {
+   /* Swap the pages if we detect consecutive order */
+   if (count > first && pages[count - 1] == p - 1)
+   swap(p, pages[count - 1]);
pages[count++] = p;
+   }
 
if (r) {
/* If there is any pages in the list put them back to
--
2.11.0

___
dri-devel mailing list
dri-de...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/2] drm/ttm: swap consecutive allocated pooled pages v3

2017-12-04 Thread He, Roger
Series is:
Reviewed-by: Roger He <hongbo...@amd.com>

Thanks
Roger(Hongbo.He)
-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: Monday, December 04, 2017 8:46 PM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; He, Roger 
<hongbo...@amd.com>
Subject: [PATCH 2/2] drm/ttm: swap consecutive allocated pooled pages v3

When we detect consecutive allocation of pages swap them to avoid accidentally 
freeing them as huge page.

v2: use swap
v3: check if it's really the first allocated page

Signed-off-by: Christian König <christian.koe...@amd.com>
---
 drivers/gpu/drm/ttm/ttm_page_alloc.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 7c4d4edce0ba..da8a50f7c8fe 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -954,8 +954,13 @@ static int ttm_get_pages(struct page **pages, unsigned 
npages, int flags,
r = ttm_page_pool_get_pages(pool, , flags, cstate,
npages - count, 0);
 
-   list_for_each_entry(p, , lru)
+   first = count;
+   list_for_each_entry(p, , lru) {
+   /* Swap the pages if we detect consecutive order */
+   if (count > first && pages[count - 1] == p - 1)
+   swap(p, pages[count - 1]);
pages[count++] = p;
+   }
 
if (r) {
/* If there is any pages in the list put them back to
--
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 1/2] drm/ttm: swap consecutive allocated cached pages v2

2017-12-04 Thread He, Roger
-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: Monday, December 04, 2017 8:13 PM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; He, Roger 
<hongbo...@amd.com>; ckoenig.leichtzumer...@gmail.com
Subject: [PATCH 1/2] drm/ttm: swap consecutive allocated cached pages v2

When we detect consecutive allocation of pages swap them to avoid accidentally 
freeing them as huge page.

v2: use swap

Signed-off-by: Christian König <christian.koe...@amd.com>
---
 drivers/gpu/drm/ttm/ttm_page_alloc.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index c82d94cbbabc..b6c5148607e9 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -921,6 +921,10 @@ static int ttm_get_pages(struct page **pages, unsigned 
npages, int flags,
return -ENOMEM;
}
 
How about add first index as below?

first = i;  //init  first here
while (npages) {
p = alloc_page(gfp_flags);
if (!p) {
pr_debug("Unable to allocate page\n");
return -ENOMEM;
}

/* Swap the pages if we detect consecutive order */
if (i > first && pages[i - 1] == p - 1) 
 //change to (i > first), because I worry if npages is 
513, the first regular page is exactly consecutive with last page of huge page. 
Then swap them is not what I want here.
swap(p, pages[i - 1]);

pages[i++] = p;
--npages;
}

pages[i++] = p;
--npages;
}
--
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amd/amdgpu: set gtt size according to system memory size only

2017-11-28 Thread He, Roger
-Original Message-
From: Koenig, Christian 
Sent: Tuesday, November 28, 2017 6:01 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amd/amdgpu: set gtt size according to system memory 
size only

Am 28.11.2017 um 10:40 schrieb Roger He:
> Change-Id: Ib634375b90d875fe04a890fc82fb1e3b7112676a
> Signed-off-by: Roger He <hongbo...@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 16 +++-
>   1 file changed, 11 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 17bf0ce..d773c5e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1328,13 +1328,19 @@ int amdgpu_ttm_init(struct amdgpu_device 
> *adev)
>   
>   if (amdgpu_gtt_size == -1) {
>   struct sysinfo si;
> + uint64_t sys_mem_size;
>   
>   si_meminfo();
> - gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
> -adev->mc.mc_vram_size),
> -((uint64_t)si.totalram * si.mem_unit * 3/4));
> - }
> - else
> + sys_mem_size = (uint64_t)si.totalram * si.mem_unit;
> + gtt_size = AMDGPU_DEFAULT_GTT_SIZE_MB << 20;
> +
> + /* leave 2GB for OS to work with */
> + if (sys_mem_size > (2ULL << 30)) {
> + gtt_size = max(gtt_size, sys_mem_size - (2ULL << 30));
> + } else

No need for the "{}" here.

> + DRM_INFO("amdgpu: Too small system memory %llu MB\n",
> + sys_mem_size >> 20);
> + } else

I have a preference to stick with the 75% rule similar to how TTM does things, 
but that isn't a hard opinion if you have a good argument.

[Roger]: originally I used the 75% rule as well. But for a special test case, 
test failed. Anyway, let's keep 75% here since seems it is more reasonable. And 
for the special test case, will use module parameter to change GTT size 
temporarily.


Thanks
Roger(Hongbo.He)




Regards,
Christian.

>   gtt_size = (uint64_t)amdgpu_gtt_size << 20;
>   r = ttm_bo_init_mm(>mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
>   if (r) {

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 3/5] drm/ttm: add page order support in ttm_pages_put

2017-11-22 Thread He, Roger
Hi Christian:

Maybe we can get back the logic for page order 0 in ttm_pages_put.

I mean: handle order 0 with set_pages_array_wb and handle order 9 with 
set_pages_wb.
If that, no performance concern.
How about that?

Thanks
Roger(Hongbo.He)
-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: Wednesday, November 22, 2017 6:01 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org
Subject: Re: [PATCH 3/5] drm/ttm: add page order support in ttm_pages_put

That completely negates the advantage of setting write back on multiple pages 
at once.

In other words this way we wouldn't need the array any more at all and could 
remove the whole complicated handling.

I'm pretty close to saying just go ahead with that and even clean up the whole 
stuff with the static array, but I'm not sure if that doesn't result in some 
performance problems.

A possible alternative is the (untested) patch attached, this way we move the 
__free_page()/_free_pages() call out of ttm_pages_put and just need to add the 
correct number of pages to the array in the loop.

Regards,
Christian.

Am 22.11.2017 um 10:17 schrieb Roger He:
> Change-Id: Ia55b206d95812c5afcfd6cec29f580758d1f50f0
> Signed-off-by: Roger He <hongbo...@amd.com>
> ---
>   drivers/gpu/drm/ttm/ttm_page_alloc.c | 19 +++
>   1 file changed, 11 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
> b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> index 9b48b93..2dc83c0 100644
> --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> @@ -299,13 +299,16 @@ static struct ttm_page_pool *ttm_get_pool(int flags, 
> bool huge,
>   }
>   
>   /* set memory back to wb and free the pages. */ -static void 
> ttm_pages_put(struct page *pages[], unsigned npages)
> +static void ttm_pages_put(struct page *pages[], unsigned npages,
> + unsigned int order)
>   {
> - unsigned i;
> - if (set_pages_array_wb(pages, npages))
> - pr_err("Failed to set %d pages to wb!\n", npages);
> - for (i = 0; i < npages; ++i)
> - __free_page(pages[i]);
> + unsigned int i, pages_nr = (1 << order);
> +
> + for (i = 0; i < npages; ++i) {
> + if (set_pages_wb(pages[i], pages_nr))
> + pr_err("Failed to set %d pages to wb!\n", pages_nr);
> + __free_pages(pages[i], order);
> + }
>   }
>   
>   static void ttm_pool_update_free_locked(struct ttm_page_pool *pool, 
> @@ -368,7 +371,7 @@ static int ttm_page_pool_free(struct ttm_page_pool *pool, 
> unsigned nr_free,
>*/
>   spin_unlock_irqrestore(>lock, irq_flags);
>   
> - ttm_pages_put(pages_to_free, freed_pages);
> + ttm_pages_put(pages_to_free, freed_pages, pool->order);
>   if (likely(nr_free != FREE_ALL_PAGES))
>   nr_free -= freed_pages;
>   
> @@ -403,7 +406,7 @@ static int ttm_page_pool_free(struct ttm_page_pool *pool, 
> unsigned nr_free,
>   spin_unlock_irqrestore(>lock, irq_flags);
>   
>   if (freed_pages)
> - ttm_pages_put(pages_to_free, freed_pages);
> + ttm_pages_put(pages_to_free, freed_pages, pool->order);
>   out:
>   if (pages_to_free != static_buf)
>   kfree(pages_to_free);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/4] drm/ttm: add page order support in ttm_pages_put

2017-11-22 Thread He, Roger
Hi Christian:

This is old patch, I already send new patches. And it is clean and simple.  
Please check that.

Thanks
Roger(Hongbo.He)

-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: Wednesday, November 22, 2017 5:29 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org
Cc: Koenig, Christian <christian.koe...@amd.com>
Subject: Re: [PATCH 2/4] drm/ttm: add page order support in ttm_pages_put

I would rather completely nuke ttm_pages_put() instead of coming up with more 
workarounds here.

Going to provide a cleanup patch to show what I mean, just give me a minute.

Regards,
Christian.

Am 22.11.2017 um 06:36 schrieb Roger He:
> Change-Id: Ia55b206d95812c5afcfd6cec29f580758d1f50f0
> Signed-off-by: Roger He <hongbo...@amd.com>
> ---
>   drivers/gpu/drm/ttm/ttm_page_alloc.c | 42 
> +---
>   1 file changed, 34 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
> b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> index 0a0c653..de64209 100644
> --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> @@ -285,13 +285,39 @@ static struct ttm_page_pool *ttm_get_pool(int flags, 
> bool huge,
>   }
>   
>   /* set memory back to wb and free the pages. */ -static void 
> ttm_pages_put(struct page *pages[], unsigned npages)
> +static void ttm_pages_put(struct page *pages[], unsigned npages,
> + unsigned int order)
>   {
> - unsigned i;
> - if (set_pages_array_wb(pages, npages))
> - pr_err("Failed to set %d pages to wb!\n", npages);
> - for (i = 0; i < npages; ++i)
> - __free_page(pages[i]);
> + struct page **pages_to_free = NULL;
> + struct page **pages_array;
> + struct page *p;
> + unsigned int i, j, pages_nr = (1 << order);
> +
> + if (order > 0) {
> + pages_to_free = kmalloc_array(pages_nr, sizeof(struct page *),
> + GFP_KERNEL);
> + if (!pages_to_free) {
> + pr_err("Failed to allocate memory for ttm pages put 
> operation\n");
> + return;
> + }
> + }
> +
> + for (i = 0; i < npages; ++i) {
> + if (order > 0) {
> + p = pages[i];
> + for (j = 0; j < pages_nr; ++j)
> + pages_to_free[j] = p++;
> +
> + pages_array = pages_to_free;
> + } else
> + pages_array = pages;
> +
> + if (set_pages_array_wb(pages_array, pages_nr))
> + pr_err("Failed to set %d pages to wb!\n", pages_nr);
> + __free_pages(pages[i], order);
> + }
> +
> + kfree(pages_to_free);
>   }
>   
>   static void ttm_pool_update_free_locked(struct ttm_page_pool *pool, 
> @@ -354,7 +380,7 @@ static int ttm_page_pool_free(struct ttm_page_pool *pool, 
> unsigned nr_free,
>*/
>   spin_unlock_irqrestore(>lock, irq_flags);
>   
> - ttm_pages_put(pages_to_free, freed_pages);
> + ttm_pages_put(pages_to_free, freed_pages, pool->order);
>   if (likely(nr_free != FREE_ALL_PAGES))
>   nr_free -= freed_pages;
>   
> @@ -389,7 +415,7 @@ static int ttm_page_pool_free(struct ttm_page_pool *pool, 
> unsigned nr_free,
>   spin_unlock_irqrestore(>lock, irq_flags);
>   
>   if (freed_pages)
> - ttm_pages_put(pages_to_free, freed_pages);
> + ttm_pages_put(pages_to_free, freed_pages, pool->order);
>   out:
>   if (pages_to_free != static_buf)
>   kfree(pages_to_free);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 1/4] drm/ttm: add page order in page pool

2017-11-22 Thread He, Roger


-Original Message-
From: Koenig, Christian 
Sent: Wednesday, November 22, 2017 3:48 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org
Subject: Re: [PATCH 1/4] drm/ttm: add page order in page pool

Am 22.11.2017 um 06:36 schrieb Roger He:
> to indicate page order for each element in the pool
>
> Change-Id: Ic609925ca5d2a5d4ad49d6becf505388ce3624cf
> Signed-off-by: Roger He <hongbo...@amd.com>
> ---
>   drivers/gpu/drm/ttm/ttm_page_alloc.c | 42 
> ++--
>   1 file changed, 31 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
> b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> index 72ea037..0a0c653 100644
> --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> @@ -81,6 +81,7 @@ struct ttm_page_pool {
>   char*name;
>   unsigned long   nfrees;
>   unsigned long   nrefills;
> + unsigned intorder;
>   };
>   
>   /**
> @@ -412,6 +413,7 @@ ttm_pool_shrink_scan(struct shrinker *shrink, struct 
> shrink_control *sc)
>   struct ttm_page_pool *pool;
>   int shrink_pages = sc->nr_to_scan;
>   unsigned long freed = 0;
> + unsigned int nr_free_pool;
>   
>   if (!mutex_trylock())
>   return SHRINK_STOP;
> @@ -421,10 +423,15 @@ ttm_pool_shrink_scan(struct shrinker *shrink, struct 
> shrink_control *sc)
>   unsigned nr_free = shrink_pages;
>   if (shrink_pages == 0)
>   break;
> +
>   pool = &_manager->pools[(i + pool_offset)%NUM_POOLS];
>   /* OK to use static buffer since global mutex is held. */
> - shrink_pages = ttm_page_pool_free(pool, nr_free, true);
> - freed += nr_free - shrink_pages;
> + nr_free_pool = (nr_free >> pool->order);
> + if (nr_free_pool == 0)
> + continue;
> +
> + shrink_pages = ttm_page_pool_free(pool, nr_free_pool, true);
> + freed += ((nr_free_pool - shrink_pages) << pool->order);
>   }
>   mutex_unlock();
>   return freed;
> @@ -436,9 +443,12 @@ ttm_pool_shrink_count(struct shrinker *shrink, struct 
> shrink_control *sc)
>   {
>   unsigned i;
>   unsigned long count = 0;
> + struct ttm_page_pool *pool;
>   
> - for (i = 0; i < NUM_POOLS; ++i)
> - count += _manager->pools[i].npages;
> + for (i = 0; i < NUM_POOLS; ++i) {
> + pool = &_manager->pools[i];
> + count += (pool->npages << pool->order);
> + }
>   
>   return count;
>   }
> @@ -932,7 +942,7 @@ static int ttm_get_pages(struct page **pages, unsigned 
> npages, int flags,
>   }
>   
>   static void ttm_page_pool_init_locked(struct ttm_page_pool *pool, gfp_t 
> flags,
> - char *name)
> + char *name, unsigned int order)
>   {
>   spin_lock_init(>lock);
>   pool->fill_lock = false;
> @@ -940,8 +950,18 @@ static void ttm_page_pool_init_locked(struct 
> ttm_page_pool *pool, gfp_t flags,
>   pool->npages = pool->nfrees = 0;
>   pool->gfp_flags = flags;
>   pool->name = name;
> + pool->order = order;
>   }
>   
> +/**
> + * Actually if TRANSPARENT_HUGEPAGE not enabled, we will not use
> + * wc_pool_huge and uc_pool_huge, so no matter whatever the page
> + * order are for those two pools
> + */
> +#ifndef CONFIG_TRANSPARENT_HUGEPAGE
> +#define  HPAGE_PMD_ORDER 9
> +#endif
> +

That still won't work and sorry I wasn't 100% clear in the last mail.

When CONFIG_TRANSPARENT_HUGEPAGE isn't set HPAGE_PMD_ORDER is defined as 
BUILD_BUG().

So you will still run into problems when that config option isn't set.

>   int ttm_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages)
>   {
>   int ret;

I suggest to just handle it here like this

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
     unsigned order = HPAGE_PMD_ORDER;
#else
     unsigned order = 0;
#endif

Apart from that the patch looks good to me, Christian.


Ok, going to modify it. Thanks!

Thanks
Roger(Hongbo.He)

> @@ -952,23 +972,23 @@ int ttm_page_alloc_init(struct ttm_mem_global 
> *glob, unsigned max_pages)
>   
>   _manager = kzalloc(sizeof(*_manager), GFP_KERNEL);
>   
> - ttm_page_pool_init_locked(&_manager->wc_pool, GFP_HIGHUSER, "wc");
> + ttm_page_pool_init_locked(&_manager->wc_pool, GFP_HIGHUSER, "wc", 
> +0);
>   
> - ttm_page_pool_init_locked(&_manager->uc_pool, GFP_HIGHUSER, "uc"

RE: 答复: [PATCH] drm/amd/amdgpu: fix over-bound accessing in amdgpu_cs_wait_any_fence

2017-11-16 Thread He, Roger
Theoretically, if first < fence_count, array[first] will not be NULL.

Hi Emily:

do you remember the  issue you fixed has same error log?


Thanks
Roger(Hongbo.He)
-Original Message-
From: Zhou, David(ChunMing) 
Sent: Friday, November 17, 2017 1:24 PM
To: Qu, Jim <jim...@amd.com>; He, Roger <hongbo...@amd.com>; 
amd-gfx@lists.freedesktop.org
Cc: Zhou, David(ChunMing) <david1.z...@amd.com>; Koenig, Christian 
<christian.koe...@amd.com>
Subject: Re: 答复: [PATCH] drm/amd/amdgpu: fix over-bound accessing in 
amdgpu_cs_wait_any_fence

Yes,  As Jim pointed out, you lacks the array[] checking.

you can just change to if (first < fence_count && array[first]), otherwise it's 
a good fix for regression.


Regards,

David Zhou


On 2017年11月17日 13:16, Qu, Jim wrote:
> Hi Roger:
>
> -   if (array[first])
> -   r = array[first]->error;
> -   else
> +   if (first == ~0)
>  r = 0;
> +   else
> +   r = array[first]->error;
>
> // The patch looks like change original logic that miss to check array[first].
>
> Thanks
> JimQu
>
> 
> 发件人: amd-gfx <amd-gfx-boun...@lists.freedesktop.org> 代表 Roger He 
> <hongbo...@amd.com>
> 发送时间: 2017年11月17日 13:04
> 收件人: amd-gfx@lists.freedesktop.org
> 抄送: Zhou, David(ChunMing); He, Roger; Koenig, Christian
> 主题: [PATCH] drm/amd/amdgpu: fix over-bound accessing in 
> amdgpu_cs_wait_any_fence
>
> fix the following issue:
>
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.712090] Oops:  [#2] SMP
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.712481] Modules linked in: 
> amdgpu(OE) chash ttm(OE) drm_kms_helper(OE) drm(OE) i2c_algo_bit fb_sys_fops 
> syscopyarea sysfillrect sysimgblt intel_rapl snd_hda_codec_realtek 
> snd_hda_codec_generic x86_pkg_temp_thermal intel_powerclamp 
> snd_hda_codec_hdmi coretemp snd_hda_intel snd_hda_codec snd_hda_core 
> snd_hwdep snd_pcm kvm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq 
> irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc 
> snd_seq_device snd_timer aesni_intel snd mei_me mei aes_x86_64 crypto_simd 
> serio_raw eeepc_wmi glue_helper asus_wmi sparse_keymap cryptd soundcore 
> shpchp wmi_bmof lpc_ich mac_hid tpm_infineon nfsd auth_rpcgss nfs_acl lockd 
> parport_pc grace ppdev sunrpc lp parport autofs4 hid_generic usbhid ahci 
> mxm_wmi r8169 libahci hid mii wmi video
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.715120] CPU: 1 PID: 1330 Comm: 
> deqp-vk Tainted: G  DOE   4.13.0-custom #1
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.715879] Hardware name: ASUS 
> All Series/Z87-A, BIOS 1802 01/28/2014
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.716658] task: 9b7115728000 
> task.stack: b178016e
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.717494] RIP: 
> 0010:amdgpu_cs_wait_fences_ioctl+0x20b/0x2e0 [amdgpu]
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.718312] RSP: 
> 0018:b178016e3cb0 EFLAGS: 00010246
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.719270] RAX:  
> RBX: b178016e3d90 RCX: 
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.720247] RDX:  
> RSI: 0001 RDI: 9b7116a1d8a8
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.721246] RBP: b178016e3d00 
> R08:  R09: 
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.722262] R10: ed00 
> R11: b178016e3d90 R12: 9b7116a1d8a8
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.723299] R13: 9b7000707020 
> R14: 0001 R15: 
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.724358] FS:  
> 7f89f3af4740() GS:9b712ec8() knlGS:
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.725447] CS:  0010 DS:  ES: 
>  CR0: 80050033
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.726550] CR2: 9b7916a1d8a0 
> CR3: 00022042e000 CR4: 001406e0
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.727687] DR0:  
> DR1:  DR2: 
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.728837] DR3:  
> DR6: fffe0ff0 DR7: 0400
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.729992] Call Trace:
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.731193]  ? 
> amdgpu_cs_fence_to_handle_ioctl+0x1c0/0x1c0 [amdgpu]
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.732406]  
> drm_ioctl_kernel+0x69/0xb0 [drm]
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.733626]  drm_ioctl+0x2d2/0x390 
> [drm]
> Nov 15 17:40:25 jenkins-MS-7984 kernel: [  146.734883]  ? 
> amdgpu_cs_fence_to_handle_ioctl+0x1c0/0x1

RE: [PATCH 1/2] drm/ttm: fix ttm_mem_evict_first once more

2017-11-15 Thread He, Roger
Reviewed-by: Roger He 

Thanks
Roger(Hongbo.He)
-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Wednesday, November 15, 2017 8:32 PM
To: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
Subject: [PATCH 1/2] drm/ttm: fix ttm_mem_evict_first once more

The code path isn't hit at the moment, but we need to take the lock to add the 
BO back to the LRU.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 
07d9c6e5b6ca..7c1eac4f4b4b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -793,10 +793,13 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
spin_unlock(>lru_lock);
 
ret = ttm_bo_evict(bo, interruptible, no_wait_gpu);
-   if (locked)
+   if (locked) {
ttm_bo_unreserve(bo);
-   else
+   } else {
+   spin_lock(>lru_lock);
ttm_bo_add_to_lru(bo);
+   spin_unlock(>lru_lock);
+   }
 
kref_put(>list_kref, ttm_bo_release_list);
return ret;
--
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amd/amdgpu: not allow gtt size exceed system memory size

2017-11-10 Thread He, Roger


-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: Friday, November 10, 2017 8:02 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amd/amdgpu: not allow gtt size exceed system memory 
size

Am 10.11.2017 um 12:33 schrieb Roger He:
> since sometimes VRAM size is bigger than system memory
>
> Change-Id: I5b14d18ed7a9f79810cc50c023ac9e240bddf101
> Signed-off-by: Roger He <hongbo...@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 11 ---
>   1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index cdbdf67..f103ccc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1330,9 +1330,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>   DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
>(unsigned) (adev->mc.real_vram_size / (1024 * 1024)));
>   
> - if (amdgpu_gtt_size == -1)
> - gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
> -adev->mc.mc_vram_size);
> + if (amdgpu_gtt_size == -1) {
> + struct sysinfo si;
> +
> + si_meminfo();
> + gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
> +adev->mc.mc_vram_size),
> +((uint64_t)si.totalram * si.mem_unit));

TTM starts to try to swap things out when more than 50% of system memory are 
used, 75% is the default hard limit where it starts to block new allocations.

So I would go with 75% here as well and code this as:

gtt_size = AMDGPU_DEFAULT_GTT_SIZE_MB << 20; gtt_size = max(gtt_size, 
adev->mc.mc_vram_size); gtt_size = min(gtt_size, ((uint64_t)si.totalram * 
si.mem_unit) * 75 / 100);

[Roger]: but in amdgpu_info_ioctl, for GTT size, reported to UMD already is *3/4
case AMDGPU_INFO_MEMORY: {

mem.gtt.heap_usage =
amdgpu_gtt_mgr_usage(>mman.bdev.man[TTM_PL_TT]);
mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4;

So is it duplicated  if *75/100 here?


Regards,
Christian.

> + 


> + }
>   else
>   gtt_size = (uint64_t)amdgpu_gtt_size << 20;
>   r = ttm_bo_init_mm(>mman.bdev, TTM_PL_TT, gtt_size >> 
> PAGE_SHIFT);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 3/4] drm/ttm: make unlocking in ttm_bo_cleanup_refs optional

2017-11-10 Thread He, Roger
Please see comments inline

-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: Wednesday, November 08, 2017 11:00 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org
Subject: [PATCH 3/4] drm/ttm: make unlocking in ttm_bo_cleanup_refs optional

Needed for the next patch.

Signed-off-by: Christian König <christian.koe...@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 52 
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 
6f55310a9d09..d23592cfe42e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -486,20 +486,21 @@ static void ttm_bo_cleanup_refs_or_queue(struct 
ttm_buffer_object *bo)  }
 
 /**
- * function ttm_bo_cleanup_refs_and_unlock
+ * function ttm_bo_cleanup_refs
  * If bo idle, remove from delayed- and lru lists, and unref.
  * If not idle, do nothing.
  *
  * Must be called with lru_lock and reservation held, this function
- * will drop both before returning.
+ * will drop the lru lock and optionally the reservation lock before returning.
  *
  * @interruptible Any sleeps should occur interruptibly.
  * @no_wait_gpu   Never wait for gpu. Return -EBUSY instead.
+ * @unlock_resv   Unlock the reservation lock as well.
  */
 
-static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
- bool interruptible,
- bool no_wait_gpu)
+static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
+  bool interruptible, bool no_wait_gpu,
+  bool unlock_resv)
 {
struct ttm_bo_global *glob = bo->glob;
struct reservation_object *resv;
@@ -518,7 +519,8 @@ static int ttm_bo_cleanup_refs_and_unlock(struct 
ttm_buffer_object *bo,
if (ret && !no_wait_gpu) {
long lret;
 
-   reservation_object_unlock(bo->resv);
+   if (unlock_resv)
+   reservation_object_unlock(bo->resv);
spin_unlock(>lru_lock);
 
lret = reservation_object_wait_timeout_rcu(resv, true, @@ 
-531,19 +533,22 @@ static int ttm_bo_cleanup_refs_and_unlock(struct 
ttm_buffer_object *bo,
return -EBUSY;
 
spin_lock(>lru_lock);
-   ret = __ttm_bo_reserve(bo, false, true, NULL);
-
-   /*
-* We raced, and lost, someone else holds the reservation now,
-* and is probably busy in ttm_bo_cleanup_memtype_use.
-*
-* Even if it's not the case, because we finished waiting any
-* delayed destruction would succeed, so just return success
-* here.
-*/
-   if (ret) {
-   spin_unlock(>lru_lock);
-   return 0;
+   if (unlock_resv) {
+   ret = __ttm_bo_reserve(bo, false, true, NULL);
+   /*
+* We raced, and lost, someone else holds the 
reservation now,
+* and is probably busy in ttm_bo_cleanup_memtype_use.
+*
+* Even if it's not the case, because we finished 
waiting any
+* delayed destruction would succeed, so just return 
success
+* here.
+*/
+   if (ret) {
+   spin_unlock(>lru_lock);
+   return 0;
+   }
+   } else {
+   ret = 0;
}
}

[Roger]: 
//Looks like we also need the condition adjudge here.  Otherwise, it 
will unlock the vm root bo reservation that is not what we want here I think.
if (unlock_resv) //need this condition here
reservation_object_unlock(bo->resv);

return 0;
}

 
@@ -600,8 +605,8 @@ static int ttm_bo_delayed_delete(struct ttm_bo_device 
*bdev, bool remove_all)
}
 
if (!ret)
-   ret = ttm_bo_cleanup_refs_and_unlock(entry, false,
-!remove_all);
+   ret = ttm_bo_cleanup_refs(entry, false, !remove_all,
+ true);
else
spin_unlock(>lru_lock);
 
@@ -770,8 +775,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
kref_get(>list_kref);
 
if (!list_empty(>ddestroy)) {
-   ret = ttm_bo_cleanup_refs_and_unlock(bo, interruptible,
-no_wait_gpu);
+ 

RE: [PATCH 6/7] drm/ttm: make unlocking in ttm_bo_cleanup_refs optional

2017-11-10 Thread He, Roger
static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
   bool interruptible, bool no_wait_gpu,
   bool unlock_resv)
{
..
ttm_bo_del_from_lru(bo);
list_del_init(>ddestroy);
kref_put(>list_kref, ttm_bo_ref_bug);

spin_unlock(>lru_lock);
ttm_bo_cleanup_memtype_use(bo);

if (unlock_resv)
  //[Roger]: not sure we should add condition here as well. If not, for 
per-vm-bo, eviction would unlock the VM root BO resv obj which is not we want I 
think.
reservation_object_unlock(bo->resv);

return 0;
}


Thanks
Roger(Hongbo.He)

-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: Thursday, November 09, 2017 4:59 PM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; He, Roger 
<hongbo...@amd.com>; mic...@daenzer.net
Subject: [PATCH 6/7] drm/ttm: make unlocking in ttm_bo_cleanup_refs optional

Needed for the next patch.

Signed-off-by: Christian König <christian.koe...@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 52 
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 
6f5d18366e6e..50a678b504f3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -486,20 +486,21 @@ static void ttm_bo_cleanup_refs_or_queue(struct 
ttm_buffer_object *bo)  }
 
 /**
- * function ttm_bo_cleanup_refs_and_unlock
+ * function ttm_bo_cleanup_refs
  * If bo idle, remove from delayed- and lru lists, and unref.
  * If not idle, do nothing.
  *
  * Must be called with lru_lock and reservation held, this function
- * will drop both before returning.
+ * will drop the lru lock and optionally the reservation lock before returning.
  *
  * @interruptible Any sleeps should occur interruptibly.
  * @no_wait_gpu   Never wait for gpu. Return -EBUSY instead.
+ * @unlock_resv   Unlock the reservation lock as well.
  */
 
-static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
- bool interruptible,
- bool no_wait_gpu)
+static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
+  bool interruptible, bool no_wait_gpu,
+  bool unlock_resv)
 {
struct ttm_bo_global *glob = bo->glob;
struct reservation_object *resv;
@@ -518,7 +519,8 @@ static int ttm_bo_cleanup_refs_and_unlock(struct 
ttm_buffer_object *bo,
if (ret && !no_wait_gpu) {
long lret;
 
-   reservation_object_unlock(bo->resv);
+   if (unlock_resv)
+   reservation_object_unlock(bo->resv);
spin_unlock(>lru_lock);
 
lret = reservation_object_wait_timeout_rcu(resv, true, @@ 
-531,19 +533,22 @@ static int ttm_bo_cleanup_refs_and_unlock(struct 
ttm_buffer_object *bo,
return -EBUSY;
 
spin_lock(>lru_lock);
-   ret = reservation_object_trylock(bo->resv) ? 0 : -EBUSY;
-
-   /*
-* We raced, and lost, someone else holds the reservation now,
-* and is probably busy in ttm_bo_cleanup_memtype_use.
-*
-* Even if it's not the case, because we finished waiting any
-* delayed destruction would succeed, so just return success
-* here.
-*/
-   if (ret) {
-   spin_unlock(>lru_lock);
-   return 0;
+   if (unlock_resv) {
+   ret = reservation_object_trylock(bo->resv) ? 0 : -EBUSY;
+   /*
+* We raced, and lost, someone else holds the 
reservation now,
+* and is probably busy in ttm_bo_cleanup_memtype_use.
+*
+* Even if it's not the case, because we finished 
waiting any
+* delayed destruction would succeed, so just return 
success
+* here.
+*/
+   if (ret) {
+   spin_unlock(>lru_lock);
+   return 0;
+   }
+   } else {
+   ret = 0;
}
}
 
@@ -600,8 +605,8 @@ static int ttm_bo_delayed_delete(struct ttm_bo_device 
*bdev, bool remove_all)
}
 
if (!ret)
-   ret = ttm_bo_cleanup_refs_and_unlock(entry, false,
-!remove_all);
+   

Recall: [PATCH 3/4] drm/ttm: make unlocking in ttm_bo_cleanup_refs optional

2017-11-10 Thread He, Roger
He, Roger would like to recall the message, "[PATCH 3/4] drm/ttm: make 
unlocking in ttm_bo_cleanup_refs optional".
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Recall: [PATCH 3/4] drm/ttm: make unlocking in ttm_bo_cleanup_refs optional

2017-11-10 Thread He, Roger
He, Roger would like to recall the message, "[PATCH 3/4] drm/ttm: make 
unlocking in ttm_bo_cleanup_refs optional".
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Recall: [PATCH 3/4] drm/ttm: make unlocking in ttm_bo_cleanup_refs optional

2017-11-10 Thread He, Roger
He, Roger would like to recall the message, "[PATCH 3/4] drm/ttm: make 
unlocking in ttm_bo_cleanup_refs optional".
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/ttm: set bo->resv point to tbo->ttm_resv after individualize_resv

2017-11-07 Thread He, Roger
I guess this because you move the resv changing out of lock of bo->resv.
Because at the beginning ttm_mem_evict_first may __ttm_bo_reserve(bo->resv) 
success, and then bo->resv has been changed by another thread. That is not 
matched and at this point bo->ttm_resv also may been freed already.

 And I think it is not easy to put it out of two lock of bo->resv and 
bo->ttm_resv.

Thanks
Roger(Hongbo.He)
-Original Message-
From: dri-devel [mailto:dri-devel-boun...@lists.freedesktop.org] On Behalf Of 
Michel D?nzer
Sent: Wednesday, November 08, 2017 12:16 AM
To: Christian König 
Cc: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/ttm: set bo->resv point to tbo->ttm_resv after 
individualize_resv

On 07/11/17 02:44 PM, Christian König wrote:
> Set bo->resv to ttm_resv during BO cleanup. This way freed BOs can be 
> better reaped during eviction.
> 
> Signed-off-by: Roger He 
> Signed-off-by: Christian König 

KASAN caught some badness while running piglit with this applied, see the 
attached dmesg excerpts.


At least some of this might be pre-existing bugs being exposed by this change. 
E.g. I've been chasing another use-after-free, with ttm_bo_delayed_delete 
trying to reserve a BO which has already been destroyed. Looks like maybe the 
ddestroy list handling isn't quite watertight yet.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: use 2MB fragment size for GFX6,7 and 8

2017-09-18 Thread He, Roger
Reviewed-by: Roger He 

Thanks
Roger(Hongbo.He)
-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Monday, September 18, 2017 8:34 PM
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH] drm/amdgpu: use 2MB fragment size for GFX6,7 and 8

From: Christian König 

Use 2MB fragment size by default for older hardware generations as well.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 2 +-  
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 2 +-  
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 5be9c83..2d1f3f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -831,7 +831,7 @@ static int gmc_v6_0_sw_init(void *handle)
if (r)
return r;
 
-   amdgpu_vm_adjust_size(adev, 64, 4);
+   amdgpu_vm_adjust_size(adev, 64, 9);
adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
 
adev->mc.mc_mask = 0xffULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index eace9e7..2256277 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -970,7 +970,7 @@ static int gmc_v7_0_sw_init(void *handle)
 * Currently set to 4GB ((1 << 20) 4k pages).
 * Max GPUVM size for cayman and SI is 40 bits.
 */
-   amdgpu_vm_adjust_size(adev, 64, 4);
+   amdgpu_vm_adjust_size(adev, 64, 9);
adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
 
/* Set the internal MC address mask
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 3b3326d..114671b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1067,7 +1067,7 @@ static int gmc_v8_0_sw_init(void *handle)
 * Currently set to 4GB ((1 << 20) 4k pages).
 * Max GPUVM size for cayman and SI is 40 bits.
 */
-   amdgpu_vm_adjust_size(adev, 64, 4);
+   amdgpu_vm_adjust_size(adev, 64, 9);
adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
 
/* Set the internal MC address mask
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: fix VM sync with always valid BOs v2

2017-09-11 Thread He, Roger
Reviewed-by: Roger He 

Thanks
Roger(Hongbo.He)
-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Monday, September 11, 2017 6:53 PM
To: Zhou, David(ChunMing) ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: fix VM sync with always valid BOs v2

Am 11.09.2017 um 11:10 schrieb zhoucm1:
>
>
> On 2017年09月11日 17:04, Christian König wrote:
>>> NAK, as Roger pointed, the performance of Vulkan Dota2 drops from 
>>> 72fps to 24fps.
>>>
>>> This patches will kill Per-vm-BO advantages, even drop to 1/3 of 
>>> previous non-per-vm-bo.
>>
>> This is irrelevant and actually a foreseen consequence of per VM BOs.
>>
>> If the performance drops is in-acceptable then the UMD shouldn't use 
>> this feature.
> I got your mean, if UMD doesn't use this feature, then all BOs should 
> be kernel only, then this change should only be limited to kernel VM 
> BOs, not include UMD BOs.
> With this limitation, I think the change also can fix your issue.

Correct, yes. Can I get your rb or ab now? We need to get this fixed asap.

Thanks,
Christian.

>
> Regards,
> David Zhou
>>
>>> all moved and relocated fence have already synced, we just need to 
>>> catch the va mapping but which is CS itself required, as my proposal 
>>> in another thread, we maybe need to expand va_ioctl to return 
>>> mapping fence to UMD, then UMD passes it to CS as dependency.
>>
>> That can be an addition to the existing handling, but first of all 
>> the current state must be corrected.
>>
>> There are at least two bug reports on the open driver fixed by this, 
>> so please review.
>>
>> Thanks,
>> Christian.
>>
>> Am 11.09.2017 um 10:59 schrieb zhoucm1:
>>> NAK, as Roger pointed, the performance of Vulkan Dota2 drops from 
>>> 72fps to 24fps.
>>>
>>> This patches will kill Per-vm-BO advantages, even drop to 1/3 of 
>>> previous non-per-vm-bo.
>>>
>>> all moved and relocated fence have already synced, we just need to 
>>> catch the va mapping but which is CS itself required, as my proposal 
>>> in another thread, we maybe need to expand va_ioctl to return 
>>> mapping fence to UMD, then UMD passes it to CS as dependency.
>>>
>>>
>>> Regards,
>>>
>>> David Zhou
>>>
>>>
>>> On 2017年09月11日 15:53, Christian König wrote:
 From: Christian König 

 All users of a VM must always wait for updates with always valid 
 BOs to be completed.

 v2: remove debugging leftovers, rename struct member

 Signed-off-by: Christian König 
 ---
   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 10 ++
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 ++-
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +-
   3 files changed, 17 insertions(+), 10 deletions(-)

 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
 index 8aa37e0..4681dcc 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
 @@ -752,10 +752,6 @@ static int amdgpu_bo_vm_update_pte(struct 
 amdgpu_cs_parser *p)
   if (r)
   return r;
   -r = amdgpu_sync_fence(adev, >job->sync, 
 vm->last_dir_update);
 -if (r)
 -return r;
 -
   r = amdgpu_vm_clear_freed(adev, vm, NULL);
   if (r)
   return r;
 @@ -810,6 +806,12 @@ static int amdgpu_bo_vm_update_pte(struct 
 amdgpu_cs_parser *p)
   }
 r = amdgpu_vm_handle_moved(adev, vm, >job->sync);
 +if (r)
 +return r;
 +
 +r = amdgpu_sync_fence(adev, >job->sync, vm->last_update);
 +if (r)
 +return r;
 if (amdgpu_vm_debug && p->bo_list) {
   /* Invalidate all BOs to test for userspace bugs */ diff 
 --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
 index 55f1ecb..5042f09 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
 @@ -1140,9 +1140,8 @@ static int amdgpu_vm_update_level(struct 
 amdgpu_device *adev,
   goto error_free;
 amdgpu_bo_fence(parent->base.bo, fence, true);
 -dma_fence_put(vm->last_dir_update);
 -vm->last_dir_update = dma_fence_get(fence);
 -dma_fence_put(fence);
 +dma_fence_put(vm->last_update);
 +vm->last_update = fence;
   }
   }
   @@ -1803,6 +1802,12 @@ int amdgpu_vm_bo_update(struct 
 amdgpu_device *adev,
   trace_amdgpu_vm_bo_mapping(mapping);
   }
   +if (bo_va->base.bo &&
 +bo_va->base.bo->tbo.resv == vm->root.base.bo->tbo.resv) {
 +dma_fence_put(vm->last_update);
 +vm->last_update = 

RE: [PATCH] drm/amdgpu: handle all fragment sizes v4

2017-08-31 Thread He, Roger
It works very well!
Tested-by: Roger He 

Thanks
Roger(Hongbo.He)
-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Thursday, August 31, 2017 5:45 PM
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH] drm/amdgpu: handle all fragment sizes v4

From: Roger He 

This can improve performance for some cases.

v2 (chk): handle all sizes, simplify the patch quite a bit
v3 (chk): adjust dw estimation as well
v4 (chk): use single loop, make end mask 64bit

Signed-off-by: Roger He 
Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55 --
 1 file changed, 26 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0379af1..4c09338 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1415,8 +1415,6 @@ static int amdgpu_vm_frag_ptes(struct 
amdgpu_pte_update_params*params,
uint64_t start, uint64_t end,
uint64_t dst, uint64_t flags)
 {
-   int r;
-
/**
 * The MC L1 TLB supports variable sized pages, based on a fragment
 * field in the PTE. When this field is set to a non-zero value, page
@@ -1435,39 +1433,38 @@ static int amdgpu_vm_frag_ptes(struct 
amdgpu_pte_update_params  *params,
 * Userspace can support this by aligning virtual base address and
 * allocation size to the fragment size.
 */
-   unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
-   uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
-   uint64_t frag_align = 1 << pages_per_frag;
-
-   uint64_t frag_start = ALIGN(start, frag_align);
-   uint64_t frag_end = end & ~(frag_align - 1);
+   unsigned max_frag = params->adev->vm_manager.fragment_size;
+   int r;
 
/* system pages are non continuously */
-   if (params->src || !(flags & AMDGPU_PTE_VALID) ||
-   (frag_start >= frag_end))
+   if (params->src || !(flags & AMDGPU_PTE_VALID))
return amdgpu_vm_update_ptes(params, start, end, dst, flags);
 
-   /* handle the 4K area at the beginning */
-   if (start != frag_start) {
-   r = amdgpu_vm_update_ptes(params, start, frag_start,
- dst, flags);
+   while (start != end) {
+   uint64_t frag_flags, frag_end;
+   unsigned frag;
+
+   /* This intentionally wraps around if no bit is set */
+   frag = min((unsigned)ffs(start) - 1,
+  (unsigned)fls64(end - start) - 1);
+   if (frag >= max_frag) {
+   frag_flags = AMDGPU_PTE_FRAG(max_frag);
+   frag_end = end & ~((1ULL << max_frag) - 1);
+   } else {
+   frag_flags = AMDGPU_PTE_FRAG(frag);
+   frag_end = start + (1 << frag);
+   }
+
+   r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
+ flags | frag_flags);
if (r)
return r;
-   dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
-   }
-
-   /* handle the area in the middle */
-   r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
- flags | frag_flags);
-   if (r)
-   return r;
 
-   /* handle the 4K area at the end */
-   if (frag_end != end) {
-   dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
-   r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
+   dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
+   start = frag_end;
}
-   return r;
+
+   return 0;
 }
 
 /**
@@ -1557,8 +1554,8 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
/* set page commands needed */
ndw += ncmds * 10;
 
-   /* two extra commands for begin/end of fragment */
-   ndw += 2 * 10;
+   /* extra commands for begin/end fragments */
+   ndw += 2 * 10 * adev->vm_manager.fragment_size;
 
params.func = amdgpu_vm_do_set_ptes;
}
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: fix new PD update code for Vega10 v2

2017-08-31 Thread He, Roger
Reviewed-and-Tested-by: Roger He 

Thanks
Roger(Hongbo.He)

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Thursday, August 31, 2017 7:58 PM
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH] drm/amdgpu: fix new PD update code for Vega10 v2

From: Christian König 

We need to refer to the parent instead of the root BO for multi level page 
tables on Vega10. Also don't set the PDE_PTE bit.

v2: Don't set the PDE_PTE bit either.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b08f031..4c678c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -308,7 +308,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device 
*adev,
/* Keep a reference to the root directory to avoid
* freeing them up in the wrong order.
*/
-   pt->parent = amdgpu_bo_ref(vm->root.base.bo);
+   pt->parent = amdgpu_bo_ref(parent->base.bo);
 
entry->base.vm = vm;
entry->base.bo = pt;
@@ -316,7 +316,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device 
*adev,
spin_lock(>status_lock);
list_add(>base.vm_status, >relocated);
spin_unlock(>status_lock);
-   entry->addr = ~0ULL;
+   entry->addr = 0;
}
 
if (level < adev->vm_manager.num_level) {
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amd/amdgpu: cover fragment size between 4 and 9 when not aligned

2017-08-30 Thread He, Roger
Additional to that the patch needs to be simplified, cause another 5 level 
recursion is not something we want because of the limited kernel stack (only 4K 
usually).
[Roger]: 
fragment = min(fragment, max(0, fls64(end - start) - 1))
so the next  level fragment which depends on the (end-start), and not always 5 
level recursion.
5 level is the worst case I think.

If you don't mind Roger I'm going to fix this today, already have a good idea 
how to handle it I think.
[Roger]: ok, please go ahead.

Thanks
Roger(Hongbo.He)
-Original Message-
From: Christian König [mailto:deathsim...@vodafone.de] 
Sent: Wednesday, August 30, 2017 2:25 PM
To: Zhou, David(ChunMing) <david1.z...@amd.com>; He, Roger <hongbo...@amd.com>; 
amd-gfx@lists.freedesktop.org
Cc: Koenig, Christian <christian.koe...@amd.com>
Subject: Re: [PATCH] drm/amd/amdgpu: cover fragment size between 4 and 9 when 
not aligned

Hi David & Roger,

> I think when you can select fragment automatically, you shouldn't 
> involve the vm_manager.fragment_size to calculation, then we can use 
> the properest  fragment for every segment and get the best performance.
No, that won't work. I've already tried this and it decreases performance. The 
problem is that the L2 on pre Vega10 uses the fragment field to decide in which 
cache segment to put the PTE.

So we should only cover fragment sizes up to whatever the 
vm_manager.fragment_size configuration is currently.

Additional to that the patch needs to be simplified, cause another 5 level 
recursion is not something we want because of the limited kernel stack (only 4K 
usually).

If you don't mind Roger I'm going to fix this today, already have a good idea 
how to handle it I think.

Regards,
Christian.

Am 30.08.2017 um 08:15 schrieb zhoucm1:
> Hi Roger,
>
> I think when you can select fragment automatically, you shouldn't 
> involve the vm_manager.fragment_size to calculation, then we can use 
> the properest  fragment for every segment and get the best performance.
>
> So vm_manager.fragment_size should be always be -1, if it becomes 
> valid fragment value, then we should always use it and disable 
> automatic selection, which should only for validation usage.
>
>
> Regards,
>
> David Zhou
>
>
> On 2017年08月30日 13:01, Roger He wrote:
>> this can get performance improvement for some cases
>>
>> Change-Id: Ibb58bb3099f7e8c4b5da90da73a03544cdb2bcb7
>> Signed-off-by: Roger He <hongbo...@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 98
>> +++---
>>   1 file changed, 79 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 592c3e7..4e5da5e 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1375,7 +1375,7 @@ static int amdgpu_vm_update_ptes(struct 
>> amdgpu_pte_update_params *params,
>>   }
>> /*
>> - * amdgpu_vm_frag_ptes - add fragment information to PTEs
>> + * amdgpu_vm_update_ptes_helper - add fragment information to PTEs
>>*
>>* @params: see amdgpu_pte_update_params definition
>>* @vm: requested vm
>> @@ -1383,11 +1383,12 @@ static int amdgpu_vm_update_ptes(struct 
>> amdgpu_pte_update_params *params,
>>* @end: last PTE to handle
>>* @dst: addr those PTEs should point to
>>* @flags: hw mapping flags
>> + * @fragment: fragment size
>>* Returns 0 for success, -EINVAL for failure.
>>*/
>> -static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params
>> *params,
>> -uint64_t start, uint64_t end,
>> -uint64_t dst, uint64_t flags)
>> +static int amdgpu_vm_update_ptes_helper(struct
>> amdgpu_pte_update_params *params,
>> +  uint64_t start, uint64_t end, uint64_t dst,
>> +  uint64_t flags, int fragment)
>>   {
>>   int r;
>>   @@ -1409,41 +1410,100 @@ static int amdgpu_vm_frag_ptes(struct 
>> amdgpu_pte_update_params*params,
>>* Userspace can support this by aligning virtual base address and
>>* allocation size to the fragment size.
>>*/
>> -unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
>> -uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
>> -uint64_t frag_align = 1 << pages_per_frag;
>> +uint64_t frag_flags, frag_align, frag_start, frag_end;
>>   -uint64_t frag_start = ALIGN(start, frag_align);
>> -uint64_t frag_end = end & ~(frag_align - 1);
>> +if (start > end || fragment < 0)
>> +

RE: [PATCH] drm/amd/amdgpu: fix BANK_SELECT on Vega10

2017-08-25 Thread He, Roger
Ok. 

Thanks
Roger(Hongbo.He)
-Original Message-
From: Christian König [mailto:deathsim...@vodafone.de] 
Sent: Friday, August 25, 2017 3:33 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Koenig, Christian <christian.koe...@amd.com>
Subject: Re: [PATCH] drm/amd/amdgpu: fix BANK_SELECT on Vega10

Hi Roger,

there are a few warnings introduced by this commit:
> drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c: In function
> ‘gfxhub_v1_0_init_cache_regs’:
> drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c:127:16: warning: unused 
> variable ‘field’ [-Wunused-variable]
>   uint32_t tmp, field;
> ^
> drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c: In function
> ‘mmhub_v1_0_init_cache_regs’:
> drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c:141:16: warning: unused 
> variable ‘field’ [-Wunused-variable]
>   uint32_t tmp, field;
> ^

Please provide a fix.

Thanks,
Christian.

Am 24.08.2017 um 10:19 schrieb Christian König:
> Am 24.08.2017 um 09:07 schrieb Roger He:
>> BANK_SELECT should always be FRAGMENT_SIZE + 3 due to 8-entry (2^3) 
>> per cache line in L2 TLB for Vega10.
>>
>> Change-Id: I8cfcff197e2c571c1a547aaed959e492b4a6fe0e
>> Signed-off-by: Roger He <hongbo...@amd.com>
>
> Reviewed-by: Christian König <christian.koe...@amd.com>
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 3 +--
>>   drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 3 +--
>>   2 files changed, 2 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>> index 4f2788b..a7351ba 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>> @@ -143,9 +143,8 @@ static void gfxhub_v1_0_init_cache_regs(struct
>> amdgpu_device *adev)
>>   tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
>>   WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
>>   -field = adev->vm_manager.fragment_size;
>>   tmp = mmVM_L2_CNTL3_DEFAULT;
>> -tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field);
>> +tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
>>   tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, 
>> L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
>>   WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>> b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>> index 4395a4f..2a6fa73 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>> @@ -157,9 +157,8 @@ static void mmhub_v1_0_init_cache_regs(struct 
>> amdgpu_device *adev)
>>   tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
>>   WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
>>   -field = adev->vm_manager.fragment_size;
>>   tmp = mmVM_L2_CNTL3_DEFAULT;
>> -tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field);
>> +tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
>>   tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, 
>> L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
>>   WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
>
>

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 3/3] drm/amdgpu: inline amdgpu_ttm_do_bind again

2017-08-23 Thread He, Roger
Reviewed-by: Roger He 

Thanks
Roger(Hongbo.He)
-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Wednesday, August 23, 2017 5:03 PM
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH 3/3] drm/amdgpu: inline amdgpu_ttm_do_bind again

From: Christian König 

The function is called only once and doesn't do anything special.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 45 +
 1 file changed, 18 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index aa0b7f5..b91a888 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -746,35 +746,11 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt 
*ttm)
sg_free_table(ttm->sg);
 }
 
-static int amdgpu_ttm_do_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem) -{
-   struct amdgpu_ttm_tt *gtt = (void *)ttm;
-   uint64_t flags;
-   int r;
-
-   spin_lock(>adev->gtt_list_lock);
-   flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, mem);
-   gtt->offset = (u64)mem->start << PAGE_SHIFT;
-   r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
-   ttm->pages, gtt->ttm.dma_address, flags);
-
-   if (r) {
-   DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
- ttm->num_pages, gtt->offset);
-   goto error_gart_bind;
-   }
-
-   list_add_tail(>list, >adev->gtt_list);
-error_gart_bind:
-   spin_unlock(>adev->gtt_list_lock);
-   return r;
-
-}
-
 static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
   struct ttm_mem_reg *bo_mem)
 {
struct amdgpu_ttm_tt *gtt = (void*)ttm;
+   uint64_t flags;
int r = 0;
 
if (gtt->userptr) {
@@ -794,9 +770,24 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
bo_mem->mem_type == AMDGPU_PL_OA)
return -EINVAL;
 
-   if (amdgpu_gtt_mgr_is_allocated(bo_mem))
-   r = amdgpu_ttm_do_bind(ttm, bo_mem);
+   if (!amdgpu_gtt_mgr_is_allocated(bo_mem))
+   return 0;
+
+   spin_lock(>adev->gtt_list_lock);
+   flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
+   gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
+   r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
+   ttm->pages, gtt->ttm.dma_address, flags);
+
+   if (r) {
+   DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
+ ttm->num_pages, gtt->offset);
+   goto error_gart_bind;
+   }
 
+   list_add_tail(>list, >adev->gtt_list);
+error_gart_bind:
+   spin_unlock(>adev->gtt_list_lock);
return r;
 }
 
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 1/3] drm/amdgpu: remove the GART copy hack

2017-08-23 Thread He, Roger
Reviewed-by: Roger He 

Thanks
Roger(Hongbo.He)
-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Wednesday, August 23, 2017 5:03 PM
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH 1/3] drm/amdgpu: remove the GART copy hack

From: Christian König 

This isn't used since we don't map evicted BOs to GART any more.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 +-
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ab6e03d..7a0656c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1615,7 +1615,6 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
  *
  * @adev: amdgpu_device pointer
  * @exclusive: fence we need to sync to
- * @gtt_flags: flags as they are used for GTT
  * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
  * @mapping: mapped range and flags to use for the update @@ -1629,7 +1628,6 
@@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
  */
 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
  struct dma_fence *exclusive,
- uint64_t gtt_flags,
  dma_addr_t *pages_addr,
  struct amdgpu_vm *vm,
  struct amdgpu_bo_va_mapping *mapping, @@ 
-1684,11 +1682,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device 
*adev,
}
 
if (pages_addr) {
-   if (flags == gtt_flags)
-   src = adev->gart.table_addr +
-   (addr >> AMDGPU_GPU_PAGE_SHIFT) * 8;
-   else
-   max_entries = min(max_entries, 16ull * 1024ull);
+   max_entries = min(max_entries, 16ull * 1024ull);
addr = 0;
} else if (flags & AMDGPU_PTE_VALID) {
addr += adev->vm_manager.vram_base_offset;
@@ -1733,10 +1727,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_vm *vm = bo_va->base.vm;
struct amdgpu_bo_va_mapping *mapping;
dma_addr_t *pages_addr = NULL;
-   uint64_t gtt_flags, flags;
struct ttm_mem_reg *mem;
struct drm_mm_node *nodes;
struct dma_fence *exclusive;
+   uint64_t flags;
int r;
 
if (clear || !bo_va->base.bo) {
@@ -1756,15 +1750,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
exclusive = reservation_object_get_excl(bo->tbo.resv);
}
 
-   if (bo) {
+   if (bo)
flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
-   gtt_flags = (amdgpu_ttm_is_bound(bo->tbo.ttm) &&
-   adev == amdgpu_ttm_adev(bo->tbo.bdev)) ?
-   flags : 0;
-   } else {
+   else
flags = 0x0;
-   gtt_flags = ~0x0;
-   }
 
spin_lock(>status_lock);
if (!list_empty(_va->base.vm_status))
@@ -1772,8 +1761,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
spin_unlock(>status_lock);
 
list_for_each_entry(mapping, _va->invalids, list) {
-   r = amdgpu_vm_bo_split_mapping(adev, exclusive,
-  gtt_flags, pages_addr, vm,
+   r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
   mapping, flags, nodes,
   _va->last_pt_update);
if (r)
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/2] drm/amd/amdgpu: expose fragment size as module parameter

2017-08-15 Thread He, Roger
-Original Message-
From: Koenig, Christian 
Sent: Tuesday, August 15, 2017 4:48 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/2] drm/amd/amdgpu: expose fragment size as module 
parameter

Am 15.08.2017 um 10:36 schrieb Roger He:
> Change-Id: I70e4ea94b8520e19cfee5ba6c9a0ecf1ee3f5f1a
> Signed-off-by: Roger He <hongbo...@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 4 
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 +--
>   drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c  | 1 -
>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c  | 1 -
>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c  | 1 -
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 2 +-
>   8 files changed, 19 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index d2aaad7..957bd2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -97,6 +97,7 @@ extern int amdgpu_bapm;
>   extern int amdgpu_deep_color;
>   extern int amdgpu_vm_size;
>   extern int amdgpu_vm_block_size;
> +extern int amdgpu_vm_fragment_size;
>   extern int amdgpu_vm_fault_stop;
>   extern int amdgpu_vm_debug;
>   extern int amdgpu_vm_update_mode;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index dd1dc87..44c66a4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1077,6 +1077,14 @@ static void amdgpu_check_arguments(struct 
> amdgpu_device *adev)
>   amdgpu_gtt_size = -1;
>   }
>   
> + /* make sense only for GFX8 and previous ASICs
> +  * valid rang is between 4 and 9 inclusive
> +  */
> + if (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4) {
> + dev_warn(adev->dev, "valid rang is between 4 and 9\n");
> + amdgpu_vm_fragment_size = 4;
> + }
> +
>   amdgpu_check_vm_size(adev);
>   
>   amdgpu_check_block_size(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 2cdf844..d9522a4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -94,6 +94,7 @@ unsigned amdgpu_ip_block_mask = 0x;
>   int amdgpu_bapm = -1;
>   int amdgpu_deep_color = 0;
>   int amdgpu_vm_size = -1;
> +int amdgpu_vm_fragment_size = 4;
>   int amdgpu_vm_block_size = -1;
>   int amdgpu_vm_fault_stop = 0;
>   int amdgpu_vm_debug = 0;
> @@ -184,6 +185,9 @@ module_param_named(deep_color, amdgpu_deep_color, int, 
> 0444);
>   MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 
> 64GB)");
>   module_param_named(vm_size, amdgpu_vm_size, int, 0444);
>   
> +MODULE_PARM_DESC(vm_fragment_size, "VM fragment size in bits (4, 5, 
> +etc. 4 = 64K (default), Max 9 = 2M)"); 
> +module_param_named(vm_fragment_size, amdgpu_vm_fragment_size, int, 
> +0444);
> +
>   MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default 
> depending on vm_size)");
>   module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 4ad04cd..85ef4d5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2437,8 +2437,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, 
> uint64_t vm_size)
>   else
>   adev->vm_manager.block_size = amdgpu_vm_block_size;
>   
> - DRM_INFO("vm size is %llu GB, block size is %u-bit\n",
> - adev->vm_manager.vm_size, adev->vm_manager.block_size);
> + adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
> +
> + DRM_INFO("vm size is %llu GB, block size is %u-bit, fragment size is 
> %u-bit\n",
> + adev->vm_manager.vm_size, adev->vm_manager.block_size,
> + adev->vm_manager.fragment_size);
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> index dcb053f..56218ac 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> @@ -815,7 +815,6 @@ static int gmc_v6_0_sw_init(void *handle)
>   return r;
>   
>   amdgpu_vm_adjust_size(adev, 64);
> - adev->vm_manager.fragment_size = 4;
>   adev->v

RE: [PATCH] drm/amdgpu: fix amdgpu_bo_gpu_accessible()

2017-07-13 Thread He, Roger
Reviewed-and-Tested-by: Roger He 

Thanks
Roger(Hongbo.He)
-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Christian K?nig
Sent: Thursday, July 13, 2017 6:23 PM
To: amd-gfx@lists.freedesktop.org
Cc: Huang, Ray 
Subject: [PATCH] drm/amdgpu: fix amdgpu_bo_gpu_accessible()

From: Christian König 

The test was relaxed a bit to much.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 3824851..833b172 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -120,7 +120,11 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo 
*bo)
  */
 static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo)  {
-   return bo->tbo.mem.mem_type != TTM_PL_SYSTEM;
+   switch (bo->tbo.mem.mem_type) {
+   case TTM_PL_TT: return amdgpu_ttm_is_bound(bo->tbo.ttm);
+   case TTM_PL_VRAM: return true;
+   default: return false;
+   }
 }
 
 int amdgpu_bo_create(struct amdgpu_device *adev,
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 3/3] drm/amdgpu: add gtt_sys_limit

2017-06-27 Thread He, Roger
Yeah, recently we indeed encountered an issue as below:
On workstation machine it has 32G system memory and thus the GTT table size 
takes up about 192M visible VRAM.
Maybe if system has bigger memory, GTT table size will exceed 256M, so 
Limitation  for max GTT table size is very necessary.

Thanks
Roger(Hongbo.He)
-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Felix 
Kuehling
Sent: Wednesday, June 28, 2017 3:25 AM
To: Christian König ; Michel Dänzer 
; John Brooks 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 3/3] drm/amdgpu: add gtt_sys_limit


On 17-06-27 04:01 AM, Christian König wrote:
> Am 27.06.2017 um 04:57 schrieb Michel Dänzer:
>> On 27/06/17 08:39 AM, John Brooks wrote:
>>> On Mon, Jun 26, 2017 at 03:39:57PM +0200, Christian König wrote:
 From: Christian König 

 Limit the size of the GART table for the system domain.

 This saves us a bunch of visible VRAM, but also limitates the 
 maximum BO size we can swap out.

 Signed-off-by: Christian König 
>>> Hmm.
>>>
>>> On my system, GTT is 4096MiB (1048576 pages). For this, the table 
>>> takes up
>>> 1048576*8 bytes = 8MiB. Reducing GTT to 256MiB (65536 pages) would 
>>> reduce the size of the table to 512 KiB. A relatively large saving, 
>>> to be sure.
>>> But in the
>>> grander scheme of things, is saving 7.5MiB (3% of visible VRAM @
>>> 256M) worth
>>> cutting GTT memory by a factor of 16?
>
> The amount of GTT memory the application can use through the VM page 
> tables stays the same.
>
> Only the system VM is limited to 256MB and so saves us a whole bunch 
> of space.
>
>> I'm afraid not, especially since it would limit the maximum BO size 
>> to < 256MB, if I understand correctly. Pretty sure that would cause 
>> failures with real world apps.
> Yes, exactly that's the major problem here. I should have put a WIP 
> mark on the patch.

OK, I should adapt this for the KFD branch. Currently we make our GART much 
bigger. On a system with lots of system memory, we can use up half the visible 
VRAM for the GART. With something like this we can get it back to something 
reasonable. But a 256MB limit on single allocation size is definitely too small.

Also, if this breaks S3, we have to make sure the hybrid branches don't pick it 
up accidentally.

Regards,
  Felix

>
> Christian.

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 3/3] drm/amdgpu: add gtt_sys_limit

2017-06-27 Thread He, Roger
Setting limitation for max GTT table size is necessary.
But my only concern is that is it enough for amdgpu_gart_sys_limit 256M. 
whether it will trigger GTT eviction from GTT to CPU domain easily?
Because GTT size always equals to VRAM size before it scales with system memory 
size.

Thanks
Roger(Hongbo.He)
-Original Message-
From: He, Roger 
Sent: Wednesday, June 28, 2017 9:58 AM
To: 'Felix Kuehling' <felix.kuehl...@amd.com>; Christian König 
<deathsim...@vodafone.de>; Michel Dänzer <mic...@daenzer.net>; John Brooks 
<j...@fastquake.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH 3/3] drm/amdgpu: add gtt_sys_limit

Yeah, recently we indeed encountered an issue as below:
On workstation machine it has 32G system memory and thus the GTT table size 
takes up about 192M visible VRAM.
Maybe if system has bigger memory, GTT table size will exceed 256M, so 
Limitation  for max GTT table size is very necessary.

Thanks
Roger(Hongbo.He)
-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Felix 
Kuehling
Sent: Wednesday, June 28, 2017 3:25 AM
To: Christian König <deathsim...@vodafone.de>; Michel Dänzer 
<mic...@daenzer.net>; John Brooks <j...@fastquake.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 3/3] drm/amdgpu: add gtt_sys_limit


On 17-06-27 04:01 AM, Christian König wrote:
> Am 27.06.2017 um 04:57 schrieb Michel Dänzer:
>> On 27/06/17 08:39 AM, John Brooks wrote:
>>> On Mon, Jun 26, 2017 at 03:39:57PM +0200, Christian König wrote:
>>>> From: Christian König <christian.koe...@amd.com>
>>>>
>>>> Limit the size of the GART table for the system domain.
>>>>
>>>> This saves us a bunch of visible VRAM, but also limitates the 
>>>> maximum BO size we can swap out.
>>>>
>>>> Signed-off-by: Christian König <christian.koe...@amd.com>
>>> Hmm.
>>>
>>> On my system, GTT is 4096MiB (1048576 pages). For this, the table 
>>> takes up
>>> 1048576*8 bytes = 8MiB. Reducing GTT to 256MiB (65536 pages) would 
>>> reduce the size of the table to 512 KiB. A relatively large saving, 
>>> to be sure.
>>> But in the
>>> grander scheme of things, is saving 7.5MiB (3% of visible VRAM @
>>> 256M) worth
>>> cutting GTT memory by a factor of 16?
>
> The amount of GTT memory the application can use through the VM page 
> tables stays the same.
>
> Only the system VM is limited to 256MB and so saves us a whole bunch 
> of space.
>
>> I'm afraid not, especially since it would limit the maximum BO size 
>> to < 256MB, if I understand correctly. Pretty sure that would cause 
>> failures with real world apps.
> Yes, exactly that's the major problem here. I should have put a WIP 
> mark on the patch.

OK, I should adapt this for the KFD branch. Currently we make our GART much 
bigger. On a system with lots of system memory, we can use up half the visible 
VRAM for the GART. With something like this we can get it back to something 
reasonable. But a 256MB limit on single allocation size is definitely too small.

Also, if this breaks S3, we have to make sure the hybrid branches don't pick it 
up accidentally.

Regards,
  Felix

>
> Christian.

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: upsteam find bo api

2017-06-18 Thread He, Roger
Patches Look good to me in itself. Acked-by: Roger.He 
<hongbo...@amd.com<mailto:hongbo...@amd.com>>
But of course  you should double confirm if there is Christian’s concern.

Thanks
Roger(Hongbo.He)
From: Zhou, David(ChunMing)
Sent: Friday, June 16, 2017 7:18 PM
To: He, Roger <hongbo...@amd.com>; amd-gfx@lists.freedesktop.org
Subject: 回复: upsteam find bo api

find bo API is used by vulkan, which is being in hybrid only previous, but 
now,they want to upstream.

Sent from my Huawei Mobile

 原始邮件 
主题:RE: upsteam find bo api
发件人:"He, Roger"
收件人:"Zhou, David(ChunMing)" ,amd-gfx@lists.freedesktop.org
抄送:
What is the background or what is it for?

Thanks
Roger(Hongbo.He)
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
zhoucm1
Sent: Friday, June 16, 2017 4:09 PM
To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: Fwd: upsteam find bo api

ping...?


 Forwarded Message 
Subject:

upsteam find bo api

Date:

Wed, 14 Jun 2017 18:16:24 +0800

From:

zhoucm1 <david1.z...@amd.com><mailto:david1.z...@amd.com>

To:

amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>



Hi all,



Since patches are one feature, and contain kernel and libdrm, I attached

them not by send-mail. Hope not inconvenience.



0001-drm-amdgpu-return-bo-itself-if-userptr-is-cpu-addr-o.patch is

kernel patch.

Other three is libdrm patches including unit test.



please review.



Regards,

David Zhou




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: upsteam find bo api

2017-06-16 Thread He, Roger
What is the background or what is it for?

Thanks
Roger(Hongbo.He)
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
zhoucm1
Sent: Friday, June 16, 2017 4:09 PM
To: amd-gfx@lists.freedesktop.org
Subject: Fwd: upsteam find bo api

ping...?


 Forwarded Message 
Subject:

upsteam find bo api

Date:

Wed, 14 Jun 2017 18:16:24 +0800

From:

zhoucm1 

To:

amd-gfx@lists.freedesktop.org



Hi all,



Since patches are one feature, and contain kernel and libdrm, I attached

them not by send-mail. Hope not inconvenience.



0001-drm-amdgpu-return-bo-itself-if-userptr-is-cpu-addr-o.patch is

kernel patch.

Other three is libdrm patches including unit test.



please review.



Regards,

David Zhou




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: extend lock range for race condition when gpu reset

2017-06-16 Thread He, Roger
Hi Xiangliang:
  Could you try to replace spinlock with mutex?

Thanks
Roger(Hongbo.He)
-Original Message-
From: Yu, Xiangliang 
Sent: Friday, June 16, 2017 2:30 PM
To: Christian König <deathsim...@vodafone.de>; Zhou, David(ChunMing) 
<david1.z...@amd.com>; He, Roger <hongbo...@amd.com>; 
amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: extend lock range for race condition when gpu 
reset

Hi Hongbo,

I got soft lockup message as below when running valkan test with two VFs, and 
work fine if reverting this patch. Could you help check it when you feel free?

[ 1920.163455] NMI watchdog: BUG: soft lockup - CPU#1 stuck for 23s! 
[deqp-vk:2175] [ 1920.163459] Modules linked in: amdkfd amd_iommu_v2 amdgpu(OE) 
ttm drm_kms_helper drm i2c_algo_bit fb_sys_fops syscopyarea sysfillrect 
sysimgblt snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core 
snd_hwdep snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq 
crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_seq_device snd_timer 
aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd serio_raw 
snd mac_hid soundcore i2c_piix4 binfmt_misc parport_pc ppdev sunrpc lp parport 
autofs4 psmouse pata_acpi floppy
[ 1920.163482] CPU: 1 PID: 2175 Comm: deqp-vk Tainted: G   OE   
4.9.0-custom #1
[ 1920.163483] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 1920.163484] task: 880139c63a80 
task.stack: c9000313c000 [ 1920.163485] RIP: 0010:[]  
[] native_queued_spin_lock_slowpath+0x15/0x1a0
[ 1920.163491] RSP: 0018:c9000313f6d8  EFLAGS: 0202 [ 1920.163492] RAX: 
0001 RBX: 88012f054d80 RCX:  [ 1920.163492] 
RDX: 0001 RSI: 0001 RDI: 8801334899a8 [ 
1920.163493] RBP: c9000313f6d8 R08:  R09: 0128 
[ 1920.163493] R10: 01b9269bb35e R11: 0001 R12: 
88012f054e28 [ 1920.163494] R13: 88012f054d80 R14: 8800369ecc00 
R15: 880133482778 [ 1920.163495] FS:  7fc0cf438740() 
GS:88013fc8() knlGS: [ 1920.163496] CS:  0010 DS: 
 ES:  CR0: 80050033 [ 1920.163496] CR2: 7fc0c5100fa0 CR3: 
00013870d000 CR4: 001406e0 [ 1920.163500] Stack:
[ 1920.163500]  c9000313f6e8 811816ef c9000313f6f8 
817e1a10 [ 1920.163502]  c9000313f738 a03242c0 
8800369ecc00 0400 [ 1920.163503]  0001 
88012f054d80 0002 88012e831380 [ 1920.163505] Call Trace:
[ 1920.163509]  [] queued_spin_lock_slowpath+0xb/0xf [ 
1920.163513]  [] _raw_spin_lock+0x20/0x30 [ 1920.163562]  
[] amdgpu_ttm_backend_unbind+0x50/0x1e0 [amdgpu] [ 
1920.163567]  [] ttm_tt_unbind+0x1e/0x30 [ttm] [ 1920.163570] 
 [] ttm_tt_destroy+0x17/0x60 [ttm] [ 1920.163572]  
[] ttm_bo_cleanup_memtype_use+0x30/0x70 [ttm] [ 1920.163575]  
[] ttm_bo_release+0x1ca/0x2a0 [ttm] [ 1920.163578]  
[] ttm_bo_unref+0x24/0x30 [ttm] [ 1920.163580]  
[] ttm_bo_pipeline_move+0x2a7/0x3a0 [ttm] [ 1920.163599]  
[] amdgpu_move_blit+0x1bc/0x260 [amdgpu] [ 1920.163617]  
[] amdgpu_bo_move+0xb9/0x230 [amdgpu] [ 1920.163620]  
[] ttm_bo_handle_move_mem+0x268/0x590 [ttm] [ 1920.163623]  
[] ? ttm_bo_mem_space+0x38d/0x440 [ttm] [ 1920.163625]  
[] ? __save_stack_trace+0x73/0xd0 [ 1920.163628]  
[] ttm_bo_validate+0x114/0x130 [ttm] [ 1920.163657]  
[] ? amdgpu_cs_bo_validate.isra.5+0xb0/0xb0 [amdgpu] [ 
1920.163675]  [] amdgpu_cs_bo_validate.isra.5+0x75/0xb0 
[amdgpu] [ 1920.163693]  [] ? 
amdgpu_cs_bo_validate.isra.5+0xb0/0xb0 [amdgpu] [ 1920.163710]  
[] amdgpu_cs_validate+0x49/0x1b0 [amdgpu] [ 1920.163727]  
[] ? amdgpu_cs_bo_validate.isra.5+0xb0/0xb0 [amdgpu] [ 
1920.163743]  [] ? amdgpu_cs_bo_validate.isra.5+0xb0/0xb0 
[amdgpu] [ 1920.163762]  [] 
amdgpu_vm_validate_level.isra.9+0x4f/0x90 [amdgpu] [ 1920.163791]  
[] ? amdgpu_cs_bo_validate.isra.5+0xb0/0xb0 [amdgpu] [ 
1920.163807]  [] amdgpu_vm_validate_level.isra.9+0x66/0x90 
[amdgpu] [ 1920.163823]  [] ? 
amdgpu_cs_bo_validate.isra.5+0xb0/0xb0 [amdgpu] [ 1920.163840]  
[] amdgpu_vm_validate_level.isra.9+0x66/0x90 [amdgpu] [ 
1920.163856]  [] amdgpu_vm_validate_pt_bos+0x26/0x30 [amdgpu] 
[ 1920.163872]  [] amdgpu_cs_ioctl+0xca8/0x1490 [amdgpu] [ 
1920.163889]  [] drm_ioctl+0x32c/0x440 [drm] [ 1920.163904]  
[] ? amdgpu_cs_find_mapping+0xb0/0xb0 [amdgpu] [ 1920.163906] 
 [] ? mem_cgroup_commit_charge+0x76/0xe0
[ 1920.163908]  [] ? page_add_new_anon_rmap+0x89/0xc0 [ 
1920.163910]  [] ? 
lru_cache_add_active_or_unevictable+0x39/0xc0
[ 1920.163925]  [] amdgpu_drm_ioctl+0x4c/0x80 [amdgpu] [ 
1920.163927]  [] do_vfs_ioctl+0x96/0x5b0 [ 1920.163929]  
[] ? __do_page_fault+0x267/0x4d0 [ 1920.163930]  
[] SyS_ioctl+0x79/0x90 [ 1920.163932]  [] 
do_syscall_64+0x6e/0x180 [ 1920.163933]  [] 
entry_SYSCALL64_slow_path+0x25/0x25

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf O