Re: [PATCH] drm/amdgpu: optimize out a spin lock (v2)

2017-06-21 Thread Michel Dänzer
On 22/06/17 11:45 AM, Alex Xie wrote:
> Use atomic instead of spin lock.
> v2: Adjust commit message
> 
> Signed-off-by: Alex Xie 

The shortlog should be more specific, e.g. something like "drm/amdgpu:
Drop spinlock from mm_stats".

It's important for the Git commit shortlog to be as specific as possible
because in many cases only the shortlogs of commits are visible.


I'll leave it to others to judge whether the conversion from spinlock to
atomics is safe / an overall win.


>   /* This returns 0 if the driver is in debt to disallow (optional)
>* buffer moves.
>*/
> - max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
> -
> - spin_unlock(>mm_stats.lock);
> + max_bytes = us_to_bytes(adev, accum_us);
>   return max_bytes;
>  }

You can just make this

return us_to_bytes(adev, accum_us);

and remove the max_bytes local.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: optimize out a spin lock (v2)

2017-06-21 Thread Alex Xie
Use atomic instead of spin lock.
v2: Adjust commit message

Signed-off-by: Alex Xie 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|   5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 110 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   1 -
 3 files changed, 76 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 7caf514..21d318b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1588,9 +1588,8 @@ struct amdgpu_device {
 
/* data for buffer migration throttling */
struct {
-   spinlock_t  lock;
-   s64 last_update_us;
-   s64 accum_us; /* accumulated microseconds */
+   atomic64_t  last_update_us;
+   atomic64_t  accum_us; /* accumulated microseconds */
u32 log2_max_MBps;
} mm_stats;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 82131d7..7b6f42e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -225,6 +225,9 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct 
amdgpu_device *adev)
s64 time_us, increment_us;
u64 max_bytes;
u64 free_vram, total_vram, used_vram;
+   s64 old_update_us, head_time_us;
+   s64 accum_us;
+   s64 old_accum_us, head_accum_us;
 
/* Allow a maximum of 200 accumulated ms. This is basically per-IB
 * throttling.
@@ -242,47 +245,83 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct 
amdgpu_device *adev)
used_vram = atomic64_read(>vram_usage);
free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
 
-   spin_lock(>mm_stats.lock);
-
/* Increase the amount of accumulated us. */
-   time_us = ktime_to_us(ktime_get());
-   increment_us = time_us - adev->mm_stats.last_update_us;
-   adev->mm_stats.last_update_us = time_us;
-   adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
-  us_upper_bound);
-
-   /* This prevents the short period of low performance when the VRAM
-* usage is low and the driver is in debt or doesn't have enough
-* accumulated us to fill VRAM quickly.
-*
-* The situation can occur in these cases:
-* - a lot of VRAM is freed by userspace
-* - the presence of a big buffer causes a lot of evictions
-*   (solution: split buffers into smaller ones)
-*
-* If 128 MB or 1/8th of VRAM is free, start filling it now by setting
-* accum_us to a positive number.
-*/
-   if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
-   s64 min_us;
-
-   /* Be more aggresive on dGPUs. Try to fill a portion of free
-* VRAM now.
-*/
-   if (!(adev->flags & AMD_IS_APU))
-   min_us = bytes_to_us(adev, free_vram / 4);
+   old_update_us = atomic64_read(>mm_stats.last_update_us);
+   for (;;) {
+   time_us = ktime_to_us(ktime_get());
+   head_time_us = atomic64_cmpxchg(>mm_stats.last_update_us,
+   old_update_us, time_us);
+
+   if (likely(head_time_us == old_update_us))
+   /*
+* No other task modified adev->mm_stats.last_update_us.
+* Update was successful.
+*/
+   break;
else
-   min_us = 0; /* Reset accum_us on APUs. */
+   /* Another task modified the value after we read it.
+* A rare contention happens, let us retry.
+* In most case, one retry can do the job.
+* See function atomic64_add_unless as a similar idea.
+*/
+   old_update_us = head_time_us;
+   }
+   increment_us = time_us - old_update_us;
+
+   old_accum_us = atomic64_read(>mm_stats.accum_us);
+
+   for (;;) {
+   accum_us = min(old_accum_us + increment_us, us_upper_bound);
+
+   /* This prevents the short period of low performance when the
+* VRAM usage is low and the driver is in debt or doesn't have
+* enough accumulated us to fill VRAM quickly.
+*
+* The situation can occur in these cases:
+* - a lot of VRAM is freed by userspace
+* - the presence of a big buffer causes a lot of evictions
+*   (solution: split buffers into smaller ones)
+*
+* If 128 MB or 1/8th