Use atomic instead of spin lock.
v2: Adjust commit message
Signed-off-by: Alex Xie
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h| 5 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 110 +++--
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 -
3 files changed, 76 insertions(+), 40 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 7caf514..21d318b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1588,9 +1588,8 @@ struct amdgpu_device {
/* data for buffer migration throttling */
struct {
- spinlock_t lock;
- s64 last_update_us;
- s64 accum_us; /* accumulated microseconds */
+ atomic64_t last_update_us;
+ atomic64_t accum_us; /* accumulated microseconds */
u32 log2_max_MBps;
} mm_stats;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 82131d7..7b6f42e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -225,6 +225,9 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct
amdgpu_device *adev)
s64 time_us, increment_us;
u64 max_bytes;
u64 free_vram, total_vram, used_vram;
+ s64 old_update_us, head_time_us;
+ s64 accum_us;
+ s64 old_accum_us, head_accum_us;
/* Allow a maximum of 200 accumulated ms. This is basically per-IB
* throttling.
@@ -242,47 +245,83 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct
amdgpu_device *adev)
used_vram = atomic64_read(&adev->vram_usage);
free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
- spin_lock(&adev->mm_stats.lock);
-
/* Increase the amount of accumulated us. */
- time_us = ktime_to_us(ktime_get());
- increment_us = time_us - adev->mm_stats.last_update_us;
- adev->mm_stats.last_update_us = time_us;
- adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
- us_upper_bound);
-
- /* This prevents the short period of low performance when the VRAM
-* usage is low and the driver is in debt or doesn't have enough
-* accumulated us to fill VRAM quickly.
-*
-* The situation can occur in these cases:
-* - a lot of VRAM is freed by userspace
-* - the presence of a big buffer causes a lot of evictions
-* (solution: split buffers into smaller ones)
-*
-* If 128 MB or 1/8th of VRAM is free, start filling it now by setting
-* accum_us to a positive number.
-*/
- if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
- s64 min_us;
-
- /* Be more aggresive on dGPUs. Try to fill a portion of free
-* VRAM now.
-*/
- if (!(adev->flags & AMD_IS_APU))
- min_us = bytes_to_us(adev, free_vram / 4);
+ old_update_us = atomic64_read(&adev->mm_stats.last_update_us);
+ for (;;) {
+ time_us = ktime_to_us(ktime_get());
+ head_time_us = atomic64_cmpxchg(&adev->mm_stats.last_update_us,
+ old_update_us, time_us);
+
+ if (likely(head_time_us == old_update_us))
+ /*
+* No other task modified adev->mm_stats.last_update_us.
+* Update was successful.
+*/
+ break;
else
- min_us = 0; /* Reset accum_us on APUs. */
+ /* Another task modified the value after we read it.
+* A rare contention happens, let us retry.
+* In most case, one retry can do the job.
+* See function atomic64_add_unless as a similar idea.
+*/
+ old_update_us = head_time_us;
+ }
+ increment_us = time_us - old_update_us;
+
+ old_accum_us = atomic64_read(&adev->mm_stats.accum_us);
+
+ for (;;) {
+ accum_us = min(old_accum_us + increment_us, us_upper_bound);
+
+ /* This prevents the short period of low performance when the
+* VRAM usage is low and the driver is in debt or doesn't have
+* enough accumulated us to fill VRAM quickly.
+*
+* The situation can occur in these cases:
+* - a lot of VRAM is freed by userspace
+* - the presence of a big buffer causes a lot of evictions
+* (solution: split buffers into smaller ones)
+*
+* If 128 MB o