Re: [PATCH 2/2] drm/amdgpu: Optimize mutex usage (v2)
Am 17.06.2017 um 00:00 schrieb axie: On 2017-06-16 01:48 PM, Christian König wrote: Am 16.06.2017 um 07:03 schrieb Alex Xie: Use rw_semaphore instead of mutex for bo_lists. In original function amdgpu_bo_list_get, the waiting for result->lock can be quite long while mutex bo_list_lock was holding. It can make other tasks waiting for bo_list_lock for long period too. Change bo_list_lock to rw_semaphore can avoid most of such long waiting. Secondly, this patch allows several tasks(readers of idr) to proceed at the same time. v2: use rcu and kref (Dave Airlie and Christian König) Signed-off-by: Alex Xie--- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 40 - 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 063fc73..e9b3981 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -871,6 +871,8 @@ struct amdgpu_fpriv { struct amdgpu_bo_list { struct mutex lock; +struct rcu_head rhead; +struct kref refcount; struct amdgpu_bo *gds_obj; struct amdgpu_bo *gws_obj; struct amdgpu_bo *oa_obj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 5af956f..efa6903 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -41,6 +41,20 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, struct drm_amdgpu_bo_list_entry *info, unsigned num_entries); +static void amdgpu_bo_list_release_rcu(struct kref *ref) +{ +unsigned i; +struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list, + refcount); + +for (i = 0; i < list->num_entries; ++i) +amdgpu_bo_unref(>array[i].robj); + +mutex_destroy(>lock); +drm_free_large(list->array); +kfree_rcu(list, rhead); +} + I'm probably missing something here: Why a new function and not change the existing amdgpu_bo_list_free() to use kfree_rcu instead? Apart from that the patch looks good to me, Christian. amdgpu_bo_list_free() is called by amdgpu_driver_postclose_kms in file amdgpu_kms.c too. For amdgpu_driver_postclose_kms, there is no need to call kfree_rcu. It is more suitable to call kfree. So we need two different release/free functions. Then I created a new function for kref_put. Ah, I see. Well call from amdgpu_driver_postclose_kms() is only used when the application crashes and doesn't clean up properly after itself. Not sure if it's a good idea to duplicate the code because of this. Waiting for an RCU grace period is a no-op in most cases (it's very likely that all other CPUs are doing something in userspace), so kfree_rcu is rather cheap. Either way the patch is Reviewed-by: Christian König . static int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, struct drm_amdgpu_bo_list_entry *info, @@ -57,7 +71,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev, /* initialize bo list*/ mutex_init(>lock); - +kref_init(>refcount); r = amdgpu_bo_list_set(adev, filp, list, info, num_entries); if (r) { kfree(list); @@ -83,14 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) mutex_lock(>bo_list_lock); list = idr_remove(>bo_list_handles, id); -if (list) { -/* Another user may have a reference to this list still */ -mutex_lock(>lock); -mutex_unlock(>lock); -amdgpu_bo_list_free(list); -} - mutex_unlock(>bo_list_lock); +if (list) +kref_put(>refcount, amdgpu_bo_list_release_rcu); } static int amdgpu_bo_list_set(struct amdgpu_device *adev, @@ -185,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id) { struct amdgpu_bo_list *result; -mutex_lock(>bo_list_lock); +rcu_read_lock(); result = idr_find(>bo_list_handles, id); -if (result) -mutex_lock(>lock); -mutex_unlock(>bo_list_lock); + +if (result) { +if (kref_get_unless_zero(>refcount)) +mutex_lock(>lock); +else +result = NULL; +} +rcu_read_unlock(); + return result; } @@ -227,6 +242,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, void amdgpu_bo_list_put(struct amdgpu_bo_list *list) { mutex_unlock(>lock); +kref_put(>refcount, amdgpu_bo_list_release_rcu); } void amdgpu_bo_list_free(struct amdgpu_bo_list *list) ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu: Optimize mutex usage (v2)
On 2017-06-16 01:48 PM, Christian König wrote: Am 16.06.2017 um 07:03 schrieb Alex Xie: Use rw_semaphore instead of mutex for bo_lists. In original function amdgpu_bo_list_get, the waiting for result->lock can be quite long while mutex bo_list_lock was holding. It can make other tasks waiting for bo_list_lock for long period too. Change bo_list_lock to rw_semaphore can avoid most of such long waiting. Secondly, this patch allows several tasks(readers of idr) to proceed at the same time. v2: use rcu and kref (Dave Airlie and Christian König) Signed-off-by: Alex Xie--- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 40 - 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 063fc73..e9b3981 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -871,6 +871,8 @@ struct amdgpu_fpriv { struct amdgpu_bo_list { struct mutex lock; +struct rcu_head rhead; +struct kref refcount; struct amdgpu_bo *gds_obj; struct amdgpu_bo *gws_obj; struct amdgpu_bo *oa_obj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 5af956f..efa6903 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -41,6 +41,20 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, struct drm_amdgpu_bo_list_entry *info, unsigned num_entries); +static void amdgpu_bo_list_release_rcu(struct kref *ref) +{ +unsigned i; +struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list, + refcount); + +for (i = 0; i < list->num_entries; ++i) +amdgpu_bo_unref(>array[i].robj); + +mutex_destroy(>lock); +drm_free_large(list->array); +kfree_rcu(list, rhead); +} + I'm probably missing something here: Why a new function and not change the existing amdgpu_bo_list_free() to use kfree_rcu instead? Apart from that the patch looks good to me, Christian. amdgpu_bo_list_free() is called by amdgpu_driver_postclose_kms in file amdgpu_kms.c too. For amdgpu_driver_postclose_kms, there is no need to call kfree_rcu. It is more suitable to call kfree. So we need two different release/free functions. Then I created a new function for kref_put. static int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, struct drm_amdgpu_bo_list_entry *info, @@ -57,7 +71,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev, /* initialize bo list*/ mutex_init(>lock); - +kref_init(>refcount); r = amdgpu_bo_list_set(adev, filp, list, info, num_entries); if (r) { kfree(list); @@ -83,14 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) mutex_lock(>bo_list_lock); list = idr_remove(>bo_list_handles, id); -if (list) { -/* Another user may have a reference to this list still */ -mutex_lock(>lock); -mutex_unlock(>lock); -amdgpu_bo_list_free(list); -} - mutex_unlock(>bo_list_lock); +if (list) +kref_put(>refcount, amdgpu_bo_list_release_rcu); } static int amdgpu_bo_list_set(struct amdgpu_device *adev, @@ -185,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id) { struct amdgpu_bo_list *result; -mutex_lock(>bo_list_lock); +rcu_read_lock(); result = idr_find(>bo_list_handles, id); -if (result) -mutex_lock(>lock); -mutex_unlock(>bo_list_lock); + +if (result) { +if (kref_get_unless_zero(>refcount)) +mutex_lock(>lock); +else +result = NULL; +} +rcu_read_unlock(); + return result; } @@ -227,6 +242,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, void amdgpu_bo_list_put(struct amdgpu_bo_list *list) { mutex_unlock(>lock); +kref_put(>refcount, amdgpu_bo_list_release_rcu); } void amdgpu_bo_list_free(struct amdgpu_bo_list *list) ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu: Optimize mutex usage (v2)
Am 16.06.2017 um 07:03 schrieb Alex Xie: Use rw_semaphore instead of mutex for bo_lists. In original function amdgpu_bo_list_get, the waiting for result->lock can be quite long while mutex bo_list_lock was holding. It can make other tasks waiting for bo_list_lock for long period too. Change bo_list_lock to rw_semaphore can avoid most of such long waiting. Secondly, this patch allows several tasks(readers of idr) to proceed at the same time. v2: use rcu and kref (Dave Airlie and Christian König) Signed-off-by: Alex Xie--- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 40 - 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 063fc73..e9b3981 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -871,6 +871,8 @@ struct amdgpu_fpriv { struct amdgpu_bo_list { struct mutex lock; + struct rcu_head rhead; + struct kref refcount; struct amdgpu_bo *gds_obj; struct amdgpu_bo *gws_obj; struct amdgpu_bo *oa_obj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 5af956f..efa6903 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -41,6 +41,20 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, struct drm_amdgpu_bo_list_entry *info, unsigned num_entries); +static void amdgpu_bo_list_release_rcu(struct kref *ref) +{ + unsigned i; + struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list, + refcount); + + for (i = 0; i < list->num_entries; ++i) + amdgpu_bo_unref(>array[i].robj); + + mutex_destroy(>lock); + drm_free_large(list->array); + kfree_rcu(list, rhead); +} + I'm probably missing something here: Why a new function and not change the existing amdgpu_bo_list_free() to use kfree_rcu instead? Apart from that the patch looks good to me, Christian. static int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, struct drm_amdgpu_bo_list_entry *info, @@ -57,7 +71,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev, /* initialize bo list*/ mutex_init(>lock); - + kref_init(>refcount); r = amdgpu_bo_list_set(adev, filp, list, info, num_entries); if (r) { kfree(list); @@ -83,14 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) mutex_lock(>bo_list_lock); list = idr_remove(>bo_list_handles, id); - if (list) { - /* Another user may have a reference to this list still */ - mutex_lock(>lock); - mutex_unlock(>lock); - amdgpu_bo_list_free(list); - } - mutex_unlock(>bo_list_lock); + if (list) + kref_put(>refcount, amdgpu_bo_list_release_rcu); } static int amdgpu_bo_list_set(struct amdgpu_device *adev, @@ -185,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id) { struct amdgpu_bo_list *result; - mutex_lock(>bo_list_lock); + rcu_read_lock(); result = idr_find(>bo_list_handles, id); - if (result) - mutex_lock(>lock); - mutex_unlock(>bo_list_lock); + + if (result) { + if (kref_get_unless_zero(>refcount)) + mutex_lock(>lock); + else + result = NULL; + } + rcu_read_unlock(); + return result; } @@ -227,6 +242,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, void amdgpu_bo_list_put(struct amdgpu_bo_list *list) { mutex_unlock(>lock); + kref_put(>refcount, amdgpu_bo_list_release_rcu); } void amdgpu_bo_list_free(struct amdgpu_bo_list *list) ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu: Optimize mutex usage (v2)
On 16/06/17 02:03 PM, Alex Xie wrote: > Use rw_semaphore instead of mutex for bo_lists. > > In original function amdgpu_bo_list_get, the waiting > for result->lock can be quite long while mutex > bo_list_lock was holding. It can make other tasks > waiting for bo_list_lock for long period too. > Change bo_list_lock to rw_semaphore can avoid most of > such long waiting. > > Secondly, this patch allows several tasks(readers of idr) > to proceed at the same time. > > v2: use rcu and kref (Dave Airlie and Christian König) At least the first two paragraphs above are inaccurate and confusing in v2. Please modify the commit log to reflect what the patch actually does now. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] drm/amdgpu: Optimize mutex usage (v2)
Use rw_semaphore instead of mutex for bo_lists. In original function amdgpu_bo_list_get, the waiting for result->lock can be quite long while mutex bo_list_lock was holding. It can make other tasks waiting for bo_list_lock for long period too. Change bo_list_lock to rw_semaphore can avoid most of such long waiting. Secondly, this patch allows several tasks(readers of idr) to proceed at the same time. v2: use rcu and kref (Dave Airlie and Christian König) Signed-off-by: Alex Xie--- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 40 - 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 063fc73..e9b3981 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -871,6 +871,8 @@ struct amdgpu_fpriv { struct amdgpu_bo_list { struct mutex lock; + struct rcu_head rhead; + struct kref refcount; struct amdgpu_bo *gds_obj; struct amdgpu_bo *gws_obj; struct amdgpu_bo *oa_obj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 5af956f..efa6903 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -41,6 +41,20 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, struct drm_amdgpu_bo_list_entry *info, unsigned num_entries); +static void amdgpu_bo_list_release_rcu(struct kref *ref) +{ + unsigned i; + struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list, + refcount); + + for (i = 0; i < list->num_entries; ++i) + amdgpu_bo_unref(>array[i].robj); + + mutex_destroy(>lock); + drm_free_large(list->array); + kfree_rcu(list, rhead); +} + static int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, struct drm_amdgpu_bo_list_entry *info, @@ -57,7 +71,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev, /* initialize bo list*/ mutex_init(>lock); - + kref_init(>refcount); r = amdgpu_bo_list_set(adev, filp, list, info, num_entries); if (r) { kfree(list); @@ -83,14 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) mutex_lock(>bo_list_lock); list = idr_remove(>bo_list_handles, id); - if (list) { - /* Another user may have a reference to this list still */ - mutex_lock(>lock); - mutex_unlock(>lock); - amdgpu_bo_list_free(list); - } - mutex_unlock(>bo_list_lock); + if (list) + kref_put(>refcount, amdgpu_bo_list_release_rcu); } static int amdgpu_bo_list_set(struct amdgpu_device *adev, @@ -185,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id) { struct amdgpu_bo_list *result; - mutex_lock(>bo_list_lock); + rcu_read_lock(); result = idr_find(>bo_list_handles, id); - if (result) - mutex_lock(>lock); - mutex_unlock(>bo_list_lock); + + if (result) { + if (kref_get_unless_zero(>refcount)) + mutex_lock(>lock); + else + result = NULL; + } + rcu_read_unlock(); + return result; } @@ -227,6 +242,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, void amdgpu_bo_list_put(struct amdgpu_bo_list *list) { mutex_unlock(>lock); + kref_put(>refcount, amdgpu_bo_list_release_rcu); } void amdgpu_bo_list_free(struct amdgpu_bo_list *list) -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx