Found this through doc inspection. I don't have a failing test case that this
fixes, but the docs specify we need to do it in addition to the A0 workaround.

Signed-off-by: Ben Widawsky <[email protected]>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c |   58 +++++++++++++++++++++++++++++--
 1 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0e99589..163f734 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1233,13 +1233,18 @@ blt_ring_put_irq(struct intel_ring_buffer *ring)
 }
 
 
+/* SNB C0 and D0 need ever more workarounds */
+#define NEED_MORE_BLT_WORKAROUND(dev) \
+       (dev->pdev->revision == 9 || dev->pdev->revision == 10)
+
 /* Workaround for some stepping of SNB,
  * each time when BLT engine ring tail moved,
  * the first command in the ring to be parsed
  * should be MI_BATCH_BUFFER_START
  */
 #define NEED_BLT_WORKAROUND(dev) \
-       (IS_GEN6(dev) && (dev->pdev->revision < 8))
+       ((IS_GEN6(dev) && (dev->pdev->revision < 8)) || \
+       (IS_GEN6(dev) && NEED_MORE_BLT_WORKAROUND(dev)))
 
 static inline struct drm_i915_gem_object *
 to_blt_workaround(struct intel_ring_buffer *ring)
@@ -1286,10 +1291,20 @@ static int blt_ring_begin(struct intel_ring_buffer 
*ring,
                          int num_dwords)
 {
        if (ring->private) {
-               int ret = intel_ring_begin(ring, num_dwords+2);
+               int ret;
+               if (NEED_MORE_BLT_WORKAROUND(ring->dev))
+                       num_dwords += 46;
+               else
+                       num_dwords += 2;
+               ret = intel_ring_begin(ring, num_dwords);
                if (ret)
                        return ret;
 
+               if (NEED_MORE_BLT_WORKAROUND(ring->dev)) {
+                       int i = 0;
+                       for (i = 0; i < 32; i++)
+                               intel_ring_emit(ring, MI_NOOP);
+               }
                intel_ring_emit(ring, MI_BATCH_BUFFER_START);
                intel_ring_emit(ring, to_blt_workaround(ring)->gtt_offset);
 
@@ -1298,6 +1313,25 @@ static int blt_ring_begin(struct intel_ring_buffer *ring,
                return intel_ring_begin(ring, 4);
 }
 
+static void blt_ring_begin2(struct intel_ring_buffer *ring)
+{
+       if (!ring->private)
+               return;
+
+       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+       intel_ring_emit(ring, 0x2209c);
+       intel_ring_emit(ring, 0x20002);
+       intel_ring_emit(ring, MI_FLUSH_DW);
+       intel_ring_emit(ring, 0x2209c);
+       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+       intel_ring_emit(ring, 0x2209c);
+       intel_ring_emit(ring, 0x20000);
+       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+       intel_ring_emit(ring, 0x2209c);
+       intel_ring_emit(ring, 0x20000);
+       intel_ring_emit(ring, MI_NOOP);
+}
+
 static int blt_ring_flush(struct intel_ring_buffer *ring,
                          u32 invalidate, u32 flush)
 {
@@ -1315,10 +1349,28 @@ static int blt_ring_flush(struct intel_ring_buffer 
*ring,
        intel_ring_emit(ring, 0);
        intel_ring_emit(ring, 0);
        intel_ring_emit(ring, MI_NOOP);
+       blt_ring_begin2(ring);
        intel_ring_advance(ring);
        return 0;
 }
 
+static int
+blt_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
+                             u32 offset, u32 len)
+{
+       int ret;
+
+       ret = blt_ring_begin(ring, 4);
+       if (ret)
+               return ret;
+       intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
+       /* bit0-7 is the length on GEN6+ */
+       intel_ring_emit(ring, offset);
+       blt_ring_begin2(ring);
+       intel_ring_advance(ring);
+
+       return 0;
+}
 static void blt_ring_cleanup(struct intel_ring_buffer *ring)
 {
        if (!ring->private)
@@ -1341,7 +1393,7 @@ static const struct intel_ring_buffer gen6_blt_ring = {
        .get_seqno              = ring_get_seqno,
        .irq_get                = blt_ring_get_irq,
        .irq_put                = blt_ring_put_irq,
-       .dispatch_execbuffer    = gen6_ring_dispatch_execbuffer,
+       .dispatch_execbuffer    = blt_ring_dispatch_execbuffer,
        .cleanup                = blt_ring_cleanup,
        .sync_to                = gen6_blt_ring_sync_to,
        .semaphore_register     = {MI_SEMAPHORE_SYNC_BR,
-- 
1.7.6.4

_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to