Re: [PATCH 2/2] drm/amdgpu: Optimize mutex usage (v2)

2017-06-17 Thread Christian König

Am 17.06.2017 um 00:00 schrieb axie:



On 2017-06-16 01:48 PM, Christian König wrote:

Am 16.06.2017 um 07:03 schrieb Alex Xie:

Use rw_semaphore instead of mutex for bo_lists.

In original function amdgpu_bo_list_get, the waiting
for result->lock can be quite long while mutex
bo_list_lock was holding. It can make other tasks
waiting for bo_list_lock for long period too.
Change bo_list_lock to rw_semaphore can avoid most of
such long waiting.

Secondly, this patch allows several tasks(readers of idr)
to proceed at the same time.

v2: use rcu and kref (Dave Airlie and Christian König)

Signed-off-by: Alex Xie 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 40 
-

  2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 063fc73..e9b3981 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -871,6 +871,8 @@ struct amdgpu_fpriv {
struct amdgpu_bo_list {
  struct mutex lock;
+struct rcu_head rhead;
+struct kref refcount;
  struct amdgpu_bo *gds_obj;
  struct amdgpu_bo *gws_obj;
  struct amdgpu_bo *oa_obj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c

index 5af956f..efa6903 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -41,6 +41,20 @@ static int amdgpu_bo_list_set(struct 
amdgpu_device *adev,

   struct drm_amdgpu_bo_list_entry *info,
   unsigned num_entries);
  +static void amdgpu_bo_list_release_rcu(struct kref *ref)
+{
+unsigned i;
+struct amdgpu_bo_list *list = container_of(ref, struct 
amdgpu_bo_list,

+   refcount);
+
+for (i = 0; i < list->num_entries; ++i)
+amdgpu_bo_unref(>array[i].robj);
+
+mutex_destroy(>lock);
+drm_free_large(list->array);
+kfree_rcu(list, rhead);
+}
+


I'm probably missing something here: Why a new function and not 
change the existing amdgpu_bo_list_free() to use kfree_rcu instead?


Apart from that the patch looks good to me,
Christian.


amdgpu_bo_list_free() is called by amdgpu_driver_postclose_kms in file 
amdgpu_kms.c too.
For amdgpu_driver_postclose_kms, there is no need to call kfree_rcu. 
It is more suitable to call kfree.


So we need two different release/free functions. Then I created a new 
function for kref_put.


Ah, I see. Well call from amdgpu_driver_postclose_kms() is only used 
when the application crashes and doesn't clean up properly after itself.


Not sure if it's a good idea to duplicate the code because of this. 
Waiting for an RCU grace period is a no-op in most cases (it's very 
likely that all other CPUs are doing something in userspace), so 
kfree_rcu is rather cheap.


Either way the patch is Reviewed-by: Christian König 
.







  static int amdgpu_bo_list_create(struct amdgpu_device *adev,
   struct drm_file *filp,
   struct drm_amdgpu_bo_list_entry *info,
@@ -57,7 +71,7 @@ static int amdgpu_bo_list_create(struct 
amdgpu_device *adev,

/* initialize bo list*/
  mutex_init(>lock);
-
+kref_init(>refcount);
  r = amdgpu_bo_list_set(adev, filp, list, info, num_entries);
  if (r) {
  kfree(list);
@@ -83,14 +97,9 @@ static void amdgpu_bo_list_destroy(struct 
amdgpu_fpriv *fpriv, int id)

mutex_lock(>bo_list_lock);
  list = idr_remove(>bo_list_handles, id);
-if (list) {
-/* Another user may have a reference to this list still */
-mutex_lock(>lock);
-mutex_unlock(>lock);
-amdgpu_bo_list_free(list);
-}
-
  mutex_unlock(>bo_list_lock);
+if (list)
+kref_put(>refcount, amdgpu_bo_list_release_rcu);
  }
static int amdgpu_bo_list_set(struct amdgpu_device *adev,
@@ -185,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, 
int id)

  {
  struct amdgpu_bo_list *result;
  -mutex_lock(>bo_list_lock);
+rcu_read_lock();
  result = idr_find(>bo_list_handles, id);
-if (result)
-mutex_lock(>lock);
-mutex_unlock(>bo_list_lock);
+
+if (result) {
+if (kref_get_unless_zero(>refcount))
+mutex_lock(>lock);
+else
+result = NULL;
+}
+rcu_read_unlock();
+
  return result;
  }
  @@ -227,6 +242,7 @@ void amdgpu_bo_list_get_list(struct 
amdgpu_bo_list *list,

  void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
  {
  mutex_unlock(>lock);
+kref_put(>refcount, amdgpu_bo_list_release_rcu);
  }
void amdgpu_bo_list_free(struct amdgpu_bo_list *list)





___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




Re: [PATCH 2/2] drm/amdgpu: Optimize mutex usage (v2)

2017-06-16 Thread axie



On 2017-06-16 01:48 PM, Christian König wrote:

Am 16.06.2017 um 07:03 schrieb Alex Xie:

Use rw_semaphore instead of mutex for bo_lists.

In original function amdgpu_bo_list_get, the waiting
for result->lock can be quite long while mutex
bo_list_lock was holding. It can make other tasks
waiting for bo_list_lock for long period too.
Change bo_list_lock to rw_semaphore can avoid most of
such long waiting.

Secondly, this patch allows several tasks(readers of idr)
to proceed at the same time.

v2: use rcu and kref (Dave Airlie and Christian König)

Signed-off-by: Alex Xie 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 40 
-

  2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 063fc73..e9b3981 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -871,6 +871,8 @@ struct amdgpu_fpriv {
struct amdgpu_bo_list {
  struct mutex lock;
+struct rcu_head rhead;
+struct kref refcount;
  struct amdgpu_bo *gds_obj;
  struct amdgpu_bo *gws_obj;
  struct amdgpu_bo *oa_obj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c

index 5af956f..efa6903 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -41,6 +41,20 @@ static int amdgpu_bo_list_set(struct amdgpu_device 
*adev,

   struct drm_amdgpu_bo_list_entry *info,
   unsigned num_entries);
  +static void amdgpu_bo_list_release_rcu(struct kref *ref)
+{
+unsigned i;
+struct amdgpu_bo_list *list = container_of(ref, struct 
amdgpu_bo_list,

+   refcount);
+
+for (i = 0; i < list->num_entries; ++i)
+amdgpu_bo_unref(>array[i].robj);
+
+mutex_destroy(>lock);
+drm_free_large(list->array);
+kfree_rcu(list, rhead);
+}
+


I'm probably missing something here: Why a new function and not change 
the existing amdgpu_bo_list_free() to use kfree_rcu instead?


Apart from that the patch looks good to me,
Christian.


amdgpu_bo_list_free() is called by amdgpu_driver_postclose_kms in file 
amdgpu_kms.c too.
For amdgpu_driver_postclose_kms, there is no need to call kfree_rcu. It 
is more suitable to call kfree.


So we need two different release/free functions. Then I created a new 
function for kref_put.





  static int amdgpu_bo_list_create(struct amdgpu_device *adev,
   struct drm_file *filp,
   struct drm_amdgpu_bo_list_entry *info,
@@ -57,7 +71,7 @@ static int amdgpu_bo_list_create(struct 
amdgpu_device *adev,

/* initialize bo list*/
  mutex_init(>lock);
-
+kref_init(>refcount);
  r = amdgpu_bo_list_set(adev, filp, list, info, num_entries);
  if (r) {
  kfree(list);
@@ -83,14 +97,9 @@ static void amdgpu_bo_list_destroy(struct 
amdgpu_fpriv *fpriv, int id)

mutex_lock(>bo_list_lock);
  list = idr_remove(>bo_list_handles, id);
-if (list) {
-/* Another user may have a reference to this list still */
-mutex_lock(>lock);
-mutex_unlock(>lock);
-amdgpu_bo_list_free(list);
-}
-
  mutex_unlock(>bo_list_lock);
+if (list)
+kref_put(>refcount, amdgpu_bo_list_release_rcu);
  }
static int amdgpu_bo_list_set(struct amdgpu_device *adev,
@@ -185,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, 
int id)

  {
  struct amdgpu_bo_list *result;
  -mutex_lock(>bo_list_lock);
+rcu_read_lock();
  result = idr_find(>bo_list_handles, id);
-if (result)
-mutex_lock(>lock);
-mutex_unlock(>bo_list_lock);
+
+if (result) {
+if (kref_get_unless_zero(>refcount))
+mutex_lock(>lock);
+else
+result = NULL;
+}
+rcu_read_unlock();
+
  return result;
  }
  @@ -227,6 +242,7 @@ void amdgpu_bo_list_get_list(struct 
amdgpu_bo_list *list,

  void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
  {
  mutex_unlock(>lock);
+kref_put(>refcount, amdgpu_bo_list_release_rcu);
  }
void amdgpu_bo_list_free(struct amdgpu_bo_list *list)





___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: Optimize mutex usage (v2)

2017-06-16 Thread Christian König

Am 16.06.2017 um 07:03 schrieb Alex Xie:

Use rw_semaphore instead of mutex for bo_lists.

In original function amdgpu_bo_list_get, the waiting
for result->lock can be quite long while mutex
bo_list_lock was holding. It can make other tasks
waiting for bo_list_lock for long period too.
Change bo_list_lock to rw_semaphore can avoid most of
such long waiting.

Secondly, this patch allows several tasks(readers of idr)
to proceed at the same time.

v2: use rcu and kref (Dave Airlie and Christian König)

Signed-off-by: Alex Xie 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 40 -
  2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 063fc73..e9b3981 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -871,6 +871,8 @@ struct amdgpu_fpriv {
  
  struct amdgpu_bo_list {

struct mutex lock;
+   struct rcu_head rhead;
+   struct kref refcount;
struct amdgpu_bo *gds_obj;
struct amdgpu_bo *gws_obj;
struct amdgpu_bo *oa_obj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 5af956f..efa6903 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -41,6 +41,20 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 struct drm_amdgpu_bo_list_entry *info,
 unsigned num_entries);
  
+static void amdgpu_bo_list_release_rcu(struct kref *ref)

+{
+   unsigned i;
+   struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list,
+  refcount);
+
+   for (i = 0; i < list->num_entries; ++i)
+   amdgpu_bo_unref(>array[i].robj);
+
+   mutex_destroy(>lock);
+   drm_free_large(list->array);
+   kfree_rcu(list, rhead);
+}
+


I'm probably missing something here: Why a new function and not change 
the existing amdgpu_bo_list_free() to use kfree_rcu instead?


Apart from that the patch looks good to me,
Christian.


  static int amdgpu_bo_list_create(struct amdgpu_device *adev,
 struct drm_file *filp,
 struct drm_amdgpu_bo_list_entry *info,
@@ -57,7 +71,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev,
  
  	/* initialize bo list*/

mutex_init(>lock);
-
+   kref_init(>refcount);
r = amdgpu_bo_list_set(adev, filp, list, info, num_entries);
if (r) {
kfree(list);
@@ -83,14 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv 
*fpriv, int id)
  
  	mutex_lock(>bo_list_lock);

list = idr_remove(>bo_list_handles, id);
-   if (list) {
-   /* Another user may have a reference to this list still */
-   mutex_lock(>lock);
-   mutex_unlock(>lock);
-   amdgpu_bo_list_free(list);
-   }
-
mutex_unlock(>bo_list_lock);
+   if (list)
+   kref_put(>refcount, amdgpu_bo_list_release_rcu);
  }
  
  static int amdgpu_bo_list_set(struct amdgpu_device *adev,

@@ -185,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
  {
struct amdgpu_bo_list *result;
  
-	mutex_lock(>bo_list_lock);

+   rcu_read_lock();
result = idr_find(>bo_list_handles, id);
-   if (result)
-   mutex_lock(>lock);
-   mutex_unlock(>bo_list_lock);
+
+   if (result) {
+   if (kref_get_unless_zero(>refcount))
+   mutex_lock(>lock);
+   else
+   result = NULL;
+   }
+   rcu_read_unlock();
+
return result;
  }
  
@@ -227,6 +242,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,

  void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
  {
mutex_unlock(>lock);
+   kref_put(>refcount, amdgpu_bo_list_release_rcu);
  }
  
  void amdgpu_bo_list_free(struct amdgpu_bo_list *list)



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: Optimize mutex usage (v2)

2017-06-15 Thread Michel Dänzer
On 16/06/17 02:03 PM, Alex Xie wrote:
> Use rw_semaphore instead of mutex for bo_lists.
> 
> In original function amdgpu_bo_list_get, the waiting
> for result->lock can be quite long while mutex
> bo_list_lock was holding. It can make other tasks
> waiting for bo_list_lock for long period too.
> Change bo_list_lock to rw_semaphore can avoid most of
> such long waiting.
> 
> Secondly, this patch allows several tasks(readers of idr)
> to proceed at the same time.
> 
> v2: use rcu and kref (Dave Airlie and Christian König)

At least the first two paragraphs above are inaccurate and confusing in
v2. Please modify the commit log to reflect what the patch actually does
now.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amdgpu: Optimize mutex usage (v2)

2017-06-15 Thread Alex Xie
Use rw_semaphore instead of mutex for bo_lists.

In original function amdgpu_bo_list_get, the waiting
for result->lock can be quite long while mutex
bo_list_lock was holding. It can make other tasks
waiting for bo_list_lock for long period too.
Change bo_list_lock to rw_semaphore can avoid most of
such long waiting.

Secondly, this patch allows several tasks(readers of idr)
to proceed at the same time.

v2: use rcu and kref (Dave Airlie and Christian König)

Signed-off-by: Alex Xie 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 40 -
 2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 063fc73..e9b3981 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -871,6 +871,8 @@ struct amdgpu_fpriv {
 
 struct amdgpu_bo_list {
struct mutex lock;
+   struct rcu_head rhead;
+   struct kref refcount;
struct amdgpu_bo *gds_obj;
struct amdgpu_bo *gws_obj;
struct amdgpu_bo *oa_obj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 5af956f..efa6903 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -41,6 +41,20 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 struct drm_amdgpu_bo_list_entry *info,
 unsigned num_entries);
 
+static void amdgpu_bo_list_release_rcu(struct kref *ref)
+{
+   unsigned i;
+   struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list,
+  refcount);
+
+   for (i = 0; i < list->num_entries; ++i)
+   amdgpu_bo_unref(>array[i].robj);
+
+   mutex_destroy(>lock);
+   drm_free_large(list->array);
+   kfree_rcu(list, rhead);
+}
+
 static int amdgpu_bo_list_create(struct amdgpu_device *adev,
 struct drm_file *filp,
 struct drm_amdgpu_bo_list_entry *info,
@@ -57,7 +71,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev,
 
/* initialize bo list*/
mutex_init(>lock);
-
+   kref_init(>refcount);
r = amdgpu_bo_list_set(adev, filp, list, info, num_entries);
if (r) {
kfree(list);
@@ -83,14 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv 
*fpriv, int id)
 
mutex_lock(>bo_list_lock);
list = idr_remove(>bo_list_handles, id);
-   if (list) {
-   /* Another user may have a reference to this list still */
-   mutex_lock(>lock);
-   mutex_unlock(>lock);
-   amdgpu_bo_list_free(list);
-   }
-
mutex_unlock(>bo_list_lock);
+   if (list)
+   kref_put(>refcount, amdgpu_bo_list_release_rcu);
 }
 
 static int amdgpu_bo_list_set(struct amdgpu_device *adev,
@@ -185,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
 {
struct amdgpu_bo_list *result;
 
-   mutex_lock(>bo_list_lock);
+   rcu_read_lock();
result = idr_find(>bo_list_handles, id);
-   if (result)
-   mutex_lock(>lock);
-   mutex_unlock(>bo_list_lock);
+
+   if (result) {
+   if (kref_get_unless_zero(>refcount))
+   mutex_lock(>lock);
+   else
+   result = NULL;
+   }
+   rcu_read_unlock();
+
return result;
 }
 
@@ -227,6 +242,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
 void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
 {
mutex_unlock(>lock);
+   kref_put(>refcount, amdgpu_bo_list_release_rcu);
 }
 
 void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx