i915: Clearing buffer objects via blitter engine

Tvrtko Ursulin Wed, 01 Jul 2015 07:55:28 -0700

Hi,


On 07/01/2015 10:25 AM, [email protected] wrote:

From: Ankitprasad Sharma <[email protected]>

This patch adds support for clearing buffer objects via blitter
engines. This is particularly useful for clearing out the memory
from stolen region.

Because CPU cannot access it? I would put that into the commit messagesince I think cover letter does not go into the git history.

v2: Add support for using execlists & PPGTT

v3: Fix issues in legacy ringbuffer submission mode

v4: Rebased to the latest drm-intel-nightly (Ankit)

testcase: igt/gem_stolen


Nitpick: usually it is "Testcase:" and all tags grouped together.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Deepak S <deepak.s at linux.intel.com>
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma at intel.com>
---
  drivers/gpu/drm/i915/Makefile           |   1 +
  drivers/gpu/drm/i915/i915_drv.h         |   4 +
  drivers/gpu/drm/i915/i915_gem_exec.c    | 201 ++++++++++++++++++++++++++++++++
  drivers/gpu/drm/i915/intel_lrc.c        |   4 +-
  drivers/gpu/drm/i915/intel_lrc.h        |   3 +
  drivers/gpu/drm/i915/intel_ringbuffer.c |   2 +-
  drivers/gpu/drm/i915/intel_ringbuffer.h |   1 +
  7 files changed, 213 insertions(+), 3 deletions(-)
  create mode 100644 drivers/gpu/drm/i915/i915_gem_exec.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index de21965..1959314 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -24,6 +24,7 @@ i915-y += i915_cmd_parser.o \
          i915_gem_debug.o \
          i915_gem_dmabuf.o \
          i915_gem_evict.o \
+         i915_gem_exec.o \
          i915_gem_execbuffer.o \
          i915_gem_gtt.o \
          i915_gem.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ea9caf2..d1e151e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3082,6 +3082,10 @@ int __must_check i915_gem_evict_something(struct 
drm_device *dev,
  int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
  int i915_gem_evict_everything(struct drm_device *dev);

+/* i915_gem_exec.c */
+int i915_gem_exec_clear_object(struct drm_i915_gem_object *obj,
+                              struct drm_i915_file_private *file_priv);
+
  /* belongs in i915_gem_gtt.h */
  static inline void i915_gem_chipset_flush(struct drm_device *dev)
  {
diff --git a/drivers/gpu/drm/i915/i915_gem_exec.c 
b/drivers/gpu/drm/i915/i915_gem_exec.c
new file mode 100644
index 0000000..a07fda0
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_exec.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright © 2013 Intel Corporation


Is the year correct?

+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris at chris-wilson.co.uk>


And author?

+ *
+ */
+
+#include <drm/drmP.h>
+#include <drm/i915_drm.h>
+#include "i915_drv.h"
+
+#define GEN8_COLOR_BLT_CMD (2<<29 | 0x50<<22)
+
+#define BPP_8 0
+#define BPP_16 (1<<24)
+#define BPP_32 (1<<25 | 1<<24)
+
+#define ROP_FILL_COPY (0xf0 << 16)
+
+static int i915_gem_exec_flush_object(struct drm_i915_gem_object *obj,
+                                     struct intel_engine_cs *ring,
+                                     struct intel_context *ctx,
+                                     struct drm_i915_gem_request **req)
+{
+       int ret;
+
+       ret = i915_gem_object_sync(obj, ring, req);
+       if (ret)
+               return ret;
+
+       if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) {
+               if (i915_gem_clflush_object(obj, false))
+                       i915_gem_chipset_flush(obj->base.dev);
+               obj->base.write_domain &= ~I915_GEM_DOMAIN_CPU;
+       }
+       if (obj->base.write_domain & I915_GEM_DOMAIN_GTT) {
+               wmb();
+               obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
+       }


All this could be replaced with i915_gem_object_set_to_gtt_domain, no?

+
+       return i915.enable_execlists ?
+                       logical_ring_invalidate_all_caches(*req) :
+                       intel_ring_invalidate_all_caches(*req);

And this is done on actual submission for you by the lower levels so youdon't need to call it directly.

+}
+
+static void i915_gem_exec_dirty_object(struct drm_i915_gem_object *obj,
+                                      struct intel_engine_cs *ring,
+                                      struct i915_address_space *vm,
+                                      struct drm_i915_gem_request *req)
+{
+       i915_gem_request_assign(&obj->last_write_req, req);
+       obj->base.read_domains = I915_GEM_DOMAIN_RENDER;
+       obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
+       i915_vma_move_to_active(i915_gem_obj_to_vma(obj, vm), req);
+       obj->dirty = 1;
+
+       ring->gpu_caches_dirty = true;
+}
+
+int i915_gem_exec_clear_object(struct drm_i915_gem_object *obj,
+                              struct drm_i915_file_private *file_priv)
+{


Function needs some good kerneldoc.

+       struct drm_device *dev = obj->base.dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct intel_engine_cs *ring;
+       struct intel_context *ctx;
+       struct intel_ringbuffer *ringbuf;
+       struct i915_address_space *vm;
+       struct drm_i915_gem_request *req;
+       int ret = 0;
+
+       lockdep_assert_held(&dev->struct_mutex);

It think there was some guidance that lockdep_assert_held is compiledout when lockdep is not in the kernel and that WARN_ON is preferred. Inthis case that would probably be WARN_ON_ONCE and return error.

+
+       ring = &dev_priv->ring[HAS_BLT(dev) ? BCS : RCS];
+       ctx = i915_gem_context_get(file_priv, DEFAULT_CONTEXT_HANDLE);
+       /* Allocate a request for this operation nice and early. */
+       ret = i915_gem_request_alloc(ring, ctx, &req);
+       if (ret)
+               return ret;
+
+       if (ctx->ppgtt)
+               vm = &ctx->ppgtt->base;
+       else
+               vm = &dev_priv->gtt.base;
+
+       if (i915.enable_execlists && !ctx->engine[ring->id].state) {
+               ret = intel_lr_context_deferred_create(ctx, ring);

i915_gem_context_get above and this call are very similar to whati915_gem_validate_context does. It seems to me it would be better tocall the latter function here.

+               if (ret)
+                       return ret;

Failure path (and some below) leaks the request. i915_gem_request_cancelis the new API to be called I understand.

+       }
+
+       ringbuf = ctx->engine[ring->id].ringbuf;
+
+       ret = i915_gem_object_pin(obj, vm, PAGE_SIZE, 0);
+       if (ret)
+               return ret;
+
+       if (obj->tiling_mode && INTEL_INFO(dev)->gen <= 3) {
+               ret = i915_gem_object_put_fence(obj);
+               if (ret)
+                       goto unpin;
+       }


Why is this needed?

Could it be called unconditionally and still work?

At least I would recommend a comment explaining it.

+       ret = i915_gem_exec_flush_object(obj, ring, ctx, &req);
+       if (ret)
+               goto unpin;

As I said above one call to i915_gem_object_set_to_gtt_domain would beenough I think.

+       if (i915.enable_execlists) {
+               if (dev_priv->info.gen >= 8) {
+                       ret = intel_logical_ring_begin(req, 8);
+                       if (ret)
+                               goto unpin;
+
+                       intel_logical_ring_emit(ringbuf, GEN8_COLOR_BLT_CMD |
+                                                        BLT_WRITE_RGBA |
+                                                        (7-2));
+                       intel_logical_ring_emit(ringbuf, BPP_32 |
+                                                        ROP_FILL_COPY |
+                                                        PAGE_SIZE);
+                       intel_logical_ring_emit(ringbuf, 0);
+                       intel_logical_ring_emit(ringbuf,
+                                               obj->base.size >> PAGE_SHIFT
+                                               << 16 | PAGE_SIZE / 4);
+                       intel_logical_ring_emit(ringbuf,
+                                               i915_gem_obj_offset(obj, vm));
+                       intel_logical_ring_emit(ringbuf, 0);
+                       intel_logical_ring_emit(ringbuf, 0);
+                       intel_logical_ring_emit(ringbuf, MI_NOOP);
+
+                       intel_logical_ring_advance(ringbuf);
+               } else {
+                       DRM_ERROR("Execlists not supported for gen %d\n",
+                                 dev_priv->info.gen);
+                       ret = -EINVAL;

I would put a WARN_ON_ONCE here, or even just return -EINVAL. If thedriver is so messed up in general that execlists are enabled < gen8 Ithink there is no point logging errors about it from here. Would alsosave you one indentation level.

+                       goto unpin;
+               }
+       } else {
+               if (IS_GEN8(dev)) {
+                       ret = intel_ring_begin(req, 8);
+                       if (ret)
+                               goto unpin;
+
+                       intel_ring_emit(ring, GEN8_COLOR_BLT_CMD |
+                                             BLT_WRITE_RGBA | (7-2));
+                       intel_ring_emit(ring, BPP_32 |
+                                             ROP_FILL_COPY | PAGE_SIZE);
+                       intel_ring_emit(ring, 0);
+                       intel_ring_emit(ring,
+                                       obj->base.size >> PAGE_SHIFT << 16 |
+                                       PAGE_SIZE / 4);
+                       intel_ring_emit(ring, i915_gem_obj_offset(obj, vm));
+                       intel_ring_emit(ring, 0);
+                       intel_ring_emit(ring, 0);
+                       intel_ring_emit(ring, MI_NOOP);

Such a pitty that these two emit blocks need to be duplicated but Iguess it is what it is now.

+               } else {
+                       ret = intel_ring_begin(req, 6);
+                       if (ret)
+                               goto unpin;
+
+                       intel_ring_emit(ring, COLOR_BLT_CMD |
+                                             BLT_WRITE_RGBA);
+                       intel_ring_emit(ring, BPP_32 |
+                                             ROP_FILL_COPY | PAGE_SIZE);
+                       intel_ring_emit(ring,
+                                       obj->base.size >> PAGE_SHIFT << 16 |
+                                       PAGE_SIZE);
+                       intel_ring_emit(ring, i915_gem_obj_offset(obj, vm));
+                       intel_ring_emit(ring, 0);
+                       intel_ring_emit(ring, MI_NOOP);
+               }
+
+               __intel_ring_advance(ring);
+       }
+
+       i915_gem_exec_dirty_object(obj, ring, vm, req);


Where is this request actually submitted?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 1/4] drm/i915: Clearing buffer objects via blitter engine

Reply via email to