Re: [Intel-gfx] [PATCH 08/15] drm/i915/bdw: implement semaphore signal

2013-12-17 Thread Chris Wilson
On Mon, Dec 16, 2013 at 08:50:44PM -0800, Ben Widawsky wrote:
 +static int gen8_rcs_signal(struct intel_ring_buffer *signaller,
 +unsigned int num_dwords)
 +{
 +#define MBOX_UPDATE_DWORDS 8
 + struct drm_device *dev = signaller-dev;
 + struct drm_i915_private *dev_priv = dev-dev_private;
 + struct intel_ring_buffer *waiter;
 + int i, ret, num_rings;
 +
 + num_rings = hweight_long(INTEL_INFO(dev)-ring_mask);
 + num_dwords = (num_rings-1) * MBOX_UPDATE_DWORDS;
 +#undef MBOX_UPDATE_DWORDS
 +
 + /* XXX: + 4 for the caller */
 + ret = intel_ring_begin(signaller, num_dwords + 4);
 + if (ret)
 + return ret;

You now pass down how many dwords the caller requires so it should just
be num_dwords += (num_rings - 1) * DWORDS_PER_MBOX; above.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 08/15] drm/i915/bdw: implement semaphore signal

2013-12-16 Thread Ben Widawsky
Semaphore signalling works similarly to previous GENs with the exception
that the per ring mailboxes no longer exist. Instead you must define
your own space, somewhere in the GTT.

The comments in the code define the layout I've opted for, which should
be fairly future proof. Ie. I tried to define offsets in abstract terms
(NUM_RINGS, seqno size, etc).

NOTE: If one wanted to move this to the HWSP they could. I've decided
one 4k object would be easier to deal with, and provide potential wins
with cache locality, but that's all speculative.

v2: Update the macro to not need the other ring's ring-id (Chris)
Update the comment to use the correct formula (Chris)

Signed-off-by: Ben Widawsky b...@bwidawsk.net
---
 drivers/gpu/drm/i915/i915_drv.h |   1 +
 drivers/gpu/drm/i915/i915_reg.h |   5 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 199 +---
 drivers/gpu/drm/i915/intel_ringbuffer.h |  38 +-
 4 files changed, 197 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c638547..4ccb436 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1355,6 +1355,7 @@ typedef struct drm_i915_private {
 
struct pci_dev *bridge_dev;
struct intel_ring_buffer ring[I915_NUM_RINGS];
+   struct drm_i915_gem_object *semaphore_obj;
uint32_t last_seqno, next_seqno;
 
drm_dma_handle_t *status_page_dmah;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ac87ab8..5c3bf66 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -215,7 +215,7 @@
 #define   MI_DISPLAY_FLIP_IVB_SPRITE_B (3  19)
 #define   MI_DISPLAY_FLIP_IVB_PLANE_C  (4  19)
 #define   MI_DISPLAY_FLIP_IVB_SPRITE_C (5  19)
-#define MI_SEMAPHORE_MBOX  MI_INSTR(0x16, 1) /* gen6+ */
+#define MI_SEMAPHORE_MBOX  MI_INSTR(0x16, 1) /* gen6, gen7 */
 #define   MI_SEMAPHORE_GLOBAL_GTT(122)
 #define   MI_SEMAPHORE_UPDATE  (121)
 #define   MI_SEMAPHORE_COMPARE (120)
@@ -240,6 +240,8 @@
 #define   MI_RESTORE_EXT_STATE_EN  (12)
 #define   MI_FORCE_RESTORE (11)
 #define   MI_RESTORE_INHIBIT   (10)
+#define MI_SEMAPHORE_SIGNALMI_INSTR(0x1b, 0) /* GEN8+ */
+#define   MI_SEMAPHORE_TARGET(engine)  ((engine)15)
 #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1)
 #define   MI_MEM_VIRTUAL   (1  22) /* 965+ only */
 #define MI_STORE_DWORD_INDEX   MI_INSTR(0x21, 1)
@@ -328,6 +330,7 @@
 #define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE(110) /* 
GM45+ only */
 #define   PIPE_CONTROL_INDIRECT_STATE_DISABLE  (19)
 #define   PIPE_CONTROL_NOTIFY  (18)
+#define   PIPE_CONTROL_FLUSH_ENABLE(17) /* gen7+ */
 #define   PIPE_CONTROL_VF_CACHE_INVALIDATE (14)
 #define   PIPE_CONTROL_CONST_CACHE_INVALIDATE  (13)
 #define   PIPE_CONTROL_STATE_CACHE_INVALIDATE  (12)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index db63a5c..f40d4da 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -619,6 +619,13 @@ static int init_render_ring(struct intel_ring_buffer *ring)
 static void render_ring_cleanup(struct intel_ring_buffer *ring)
 {
struct drm_device *dev = ring-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+
+   if (dev_priv-semaphore_obj) {
+   i915_gem_object_unpin(dev_priv-semaphore_obj);
+   drm_gem_object_unreference(dev_priv-semaphore_obj-base);
+   dev_priv-semaphore_obj = NULL;
+   }
 
if (ring-scratch.obj == NULL)
return;
@@ -632,6 +639,86 @@ static void render_ring_cleanup(struct intel_ring_buffer 
*ring)
ring-scratch.obj = NULL;
 }
 
+static int gen8_rcs_signal(struct intel_ring_buffer *signaller,
+  unsigned int num_dwords)
+{
+#define MBOX_UPDATE_DWORDS 8
+   struct drm_device *dev = signaller-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct intel_ring_buffer *waiter;
+   int i, ret, num_rings;
+
+   num_rings = hweight_long(INTEL_INFO(dev)-ring_mask);
+   num_dwords = (num_rings-1) * MBOX_UPDATE_DWORDS;
+#undef MBOX_UPDATE_DWORDS
+
+   /* XXX: + 4 for the caller */
+   ret = intel_ring_begin(signaller, num_dwords + 4);
+   if (ret)
+   return ret;
+
+   for_each_ring(waiter, dev_priv, i) {
+   u64 gtt_offset = signaller-semaphore.signal_ggtt[i];
+   if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
+   continue;
+
+   intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
+   intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
+  PIPE_CONTROL_QW_WRITE |
+