After much hair pulling, resort to preallocating the ppGTT entries on
init to circumvent the apparent lack of PD invalidate following the
write to PP_DCLV upon switching mm between contexts (and here the same
context after binding new objects). However, the details of that PP_DCLV
invalidate are still unknown, and it appears we need to reload the mm
twice to cover over a timing issue. Worrying.

Fixes: 3dc007fe9b2b ("drm/i915/gtt: Downgrade gen7 (ivb, byt, hsw) back to 
aliasing-ppgtt")
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 21 ++++++++-----------
 drivers/gpu/drm/i915/i915_gem_gtt.c           | 17 ++++++++-------
 drivers/gpu/drm/i915/i915_pci.c               |  2 +-
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c 
b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index f25ceccb335e..f977fc27b001 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1366,7 +1366,7 @@ static int load_pd_dir(struct i915_request *rq, const 
struct i915_ppgtt *ppgtt)
        const struct intel_engine_cs * const engine = rq->engine;
        u32 *cs;
 
-       cs = intel_ring_begin(rq, 6);
+       cs = intel_ring_begin(rq, 10);
        if (IS_ERR(cs))
                return PTR_ERR(cs);
 
@@ -1374,6 +1374,12 @@ static int load_pd_dir(struct i915_request *rq, const 
struct i915_ppgtt *ppgtt)
        *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
        *cs++ = PP_DIR_DCLV_2G;
 
+       *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+       *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
+       *cs++ = intel_gt_scratch_offset(rq->engine->gt,
+                                       INTEL_GT_SCRATCH_FIELD_DEFAULT);
+       *cs++ = MI_NOOP;
+
        *cs++ = MI_LOAD_REGISTER_IMM(1);
        *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
        *cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
@@ -1579,6 +1585,7 @@ static int switch_context(struct i915_request *rq)
 {
        struct intel_context *ce = rq->hw_context;
        struct i915_address_space *vm = vm_alias(ce);
+       u32 hw_flags = 0;
        int ret;
 
        GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
@@ -1590,19 +1597,9 @@ static int switch_context(struct i915_request *rq)
        }
 
        if (ce->state) {
-               u32 hw_flags;
-
                GEM_BUG_ON(rq->engine->id != RCS0);
 
-               /*
-                * The kernel context(s) is treated as pure scratch and is not
-                * expected to retain any state (as we sacrifice it during
-                * suspend and on resume it may be corrupted). This is ok,
-                * as nothing actually executes using the kernel context; it
-                * is purely used for flushing user contexts.
-                */
-               hw_flags = 0;
-               if (i915_gem_context_is_kernel(rq->gem_context))
+               if (!rq->engine->default_state)
                        hw_flags = MI_RESTORE_INHIBIT;
 
                ret = mi_set_context(rq, hw_flags);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 6239a9adbf14..d76826846997 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1692,7 +1692,6 @@ static int gen6_alloc_va_range(struct i915_address_space 
*vm,
        intel_wakeref_t wakeref;
        u64 from = start;
        unsigned int pde;
-       bool flush = false;
        int ret = 0;
 
        wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
@@ -1717,11 +1716,6 @@ static int gen6_alloc_va_range(struct i915_address_space 
*vm,
                        spin_lock(&pd->lock);
                        if (pd->entry[pde] == &vm->scratch[1]) {
                                pd->entry[pde] = pt;
-                               if (i915_vma_is_bound(ppgtt->vma,
-                                                     I915_VMA_GLOBAL_BIND)) {
-                                       gen6_write_pde(ppgtt, pde, pt);
-                                       flush = true;
-                               }
                        } else {
                                alloc = pt;
                                pt = pd->entry[pde];
@@ -1732,8 +1726,16 @@ static int gen6_alloc_va_range(struct i915_address_space 
*vm,
        }
        spin_unlock(&pd->lock);
 
-       if (flush)
+       if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) {
+               /* Rewrite them all! Anything less misses an invalidate. */
+               mutex_lock(&vm->mutex);
+               gen6_for_all_pdes(pt, pd, pde)
+                       gen6_write_pde(ppgtt, pde, pt);
+               mutex_unlock(&vm->mutex);
+
+               ioread32(ppgtt->pd_addr + pde - 1);
                gen6_ggtt_invalidate(vm->gt->ggtt);
+       }
 
        goto out;
 
@@ -1994,6 +1996,7 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct 
drm_i915_private *i915)
 err_pd:
        kfree(ppgtt->base.pd);
 err_free:
+       mutex_destroy(&ppgtt->pin_mutex);
        kfree(ppgtt);
        return ERR_PTR(err);
 }
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index da3e9b5752ac..583e0cd94a6a 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -436,7 +436,7 @@ static const struct intel_device_info 
intel_sandybridge_m_gt2_info = {
        .has_rc6 = 1, \
        .has_rc6p = 1, \
        .has_rps = true, \
-       .ppgtt_type = INTEL_PPGTT_ALIASING, \
+       .ppgtt_type = INTEL_PPGTT_FULL, \
        .ppgtt_size = 31, \
        IVB_PIPE_OFFSETS, \
        IVB_CURSOR_OFFSETS, \
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to