Flushing the cachelines for an object is slow, can be as much as 100ms
for a large framebuffer. We currently do this under the struct_mutex BKL
on execution or on pageflip. But now with the ability to add fences to
obj->resv for both flips and execbuf (and we naturally wait on the fence
before CPU access), we can move the clflush operation to a workqueue and
signal a fence for completion, thereby doing the work asynchronously and
not blocking the driver or its clients.
Suggested-by: Akash Goel
Signed-off-by: Chris Wilson
Cc: Akash Goel
---
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/i915_drv.h| 8 +-
drivers/gpu/drm/i915/i915_gem.c| 60 +++--
drivers/gpu/drm/i915/i915_gem_clflush.c| 138 +
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 +-
drivers/gpu/drm/i915/intel_display.c | 57 ++--
6 files changed, 190 insertions(+), 80 deletions(-)
create mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 0857e5035f4d..6afd402e440b 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -29,6 +29,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
# GEM code
i915-y += i915_cmd_parser.o \
i915_gem_batch_pool.o \
+ i915_gem_clflush.o \
i915_gem_context.o \
i915_gem_dmabuf.o \
i915_gem_evict.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2754d5de76af..c80044267333 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3410,7 +3410,13 @@ static inline u32 i915_reset_count(struct i915_gpu_error
*error)
void i915_gem_reset(struct drm_i915_private *dev_priv);
void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
-bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
+
+void i915_gem_clflush_init(struct drm_i915_private *i915);
+int i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+ unsigned int flags);
+#define I915_CLFLUSH_FORCE BIT(0)
+#define I915_CLFLUSH_SYNC BIT(1)
+
int __must_check i915_gem_init(struct drm_device *dev);
int __must_check i915_gem_init_hw(struct drm_device *dev);
void i915_gem_init_swizzling(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cffe60237b6a..524f72774537 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -230,7 +230,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object
*obj)
obj->mm.dirty = false;
if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
- i915_gem_clflush_object(obj, false);
+ i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
@@ -1570,6 +1570,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void
*data,
mutex_unlock(&dev->struct_mutex);
+ if (err == 0)
+ err = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT,
+ NULL);
if (write_domain != 0)
intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
@@ -3236,44 +3241,6 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64
alignment, u64 flags)
return ret;
}
-bool
-i915_gem_clflush_object(struct drm_i915_gem_object *obj,
- bool force)
-{
- /* If we don't have a page list set up, then we're not pinned
-* to GPU, and we can ignore the cache flush because it'll happen
-* again at bind time.
-*/
- if (!obj->mm.pages)
- return false;
-
- /*
-* Stolen memory is always coherent with the GPU as it is explicitly
-* marked as wc by the system, or the system is cache-coherent.
-*/
- if (obj->stolen || obj->phys_handle)
- return false;
-
- /* If the GPU is snooping the contents of the CPU cache,
-* we do not need to manually clear the CPU cache lines. However,
-* the caches are only snooped when the render cache is
-* flushed/invalidated. As we always have to emit invalidations
-* and flushes when moving into and out of the RENDER domain, correct
-* snooping behaviour occurs naturally as the result of our domain
-* tracking.
-*/
- if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
- obj->cache_dirty = true;
- return false;
- }
-
- trace_i915_gem_object_clflush(obj);
- drm_clflush_sg(obj->mm.pages);
- obj->cache_dirty = false;
-
- return true;
-}
-
/** Flushes the GTT wr