If some of the contexts submitting workloads to the GPU have been
configured to shutdown slices/subslices, we might loose the NOA
configurations written in the NOA muxes. We need to reprogram them
when we detect a powergating configuration change.

In this change i915/perf is responsible for setting up a reprogramming
batchbuffer which we execute just before the userspace submitted
batchbuffer. We do this while preemption is still disable, only if
needed. The decision to execute this reprogramming batchbuffer is made
when we assign a request to an execlist port.

v2: Only reprogram when detecting configuration changes (Chris/Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h           |   6 ++
 drivers/gpu/drm/i915/i915_perf.c          | 108 ++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_request.h       |   6 ++
 drivers/gpu/drm/i915/intel_gpu_commands.h |   1 +
 drivers/gpu/drm/i915/intel_lrc.c          |  59 +++++++++++-
 drivers/gpu/drm/i915/intel_ringbuffer.c   |   2 +
 drivers/gpu/drm/i915/intel_ringbuffer.h   |   2 +
 7 files changed, 183 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 04e27806e581..07cdbfb4ad7a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1461,6 +1461,12 @@ struct i915_perf_stream {
         * @oa_config: The OA configuration used by the stream.
         */
        struct i915_oa_config *oa_config;
+
+       /**
+        * @noa_reprogram_vma: A batchbuffer reprogramming the NOA muxes, used
+        * after switching powergating configurations.
+        */
+       struct i915_vma *noa_reprogram_vma;
 };
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 5b279a82445a..1b9e3d6a53a2 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1691,6 +1691,96 @@ static int gen8_emit_oa_config(struct i915_request *rq,
        return 0;
 }
 
+#define MAX_LRI_SIZE (125U)
+
+static u32 noa_reprogram_bb_size(struct i915_oa_config *oa_config)
+{
+       u32 n_lri;
+
+       /* Very unlikely but possible that we have no muxes to configure. */
+       if (!oa_config->mux_regs_len)
+               return 0;
+
+       n_lri = (oa_config->mux_regs_len / MAX_LRI_SIZE) +
+               (oa_config->mux_regs_len % MAX_LRI_SIZE) != 0;
+
+       return n_lri * 4 + oa_config->mux_regs_len * 8 + /* 
MI_LOAD_REGISTER_IMMs */
+               6 * 4 + /* PIPE_CONTROL */
+               4; /* MI_BATCH_BUFFER_END */
+}
+
+static int
+alloc_noa_reprogram_bo(struct i915_perf_stream *stream)
+{
+       struct drm_i915_private *dev_priv = stream->dev_priv;
+       struct i915_oa_config *oa_config = stream->oa_config;
+       struct drm_i915_gem_object *bo;
+       struct i915_vma *vma;
+       u32 buffer_size;
+       u32 *cs;
+       int i, ret, n_loaded_regs;
+
+       buffer_size = ALIGN(noa_reprogram_bb_size(oa_config), PAGE_SIZE);
+       if (buffer_size == 0)
+               return 0;
+
+       bo = i915_gem_object_create(dev_priv, buffer_size);
+       if (IS_ERR(bo)) {
+               DRM_ERROR("Failed to allocate NOA reprogramming buffer\n");
+               return PTR_ERR(bo);
+       }
+
+       cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
+       if (IS_ERR(cs)) {
+               ret = PTR_ERR(cs);
+               goto err_unref_bo;
+       }
+
+       n_loaded_regs = 0;
+       for (i = 0; i < oa_config->mux_regs_len; i++) {
+               if ((n_loaded_regs % MAX_LRI_SIZE) == 0) {
+                       u32 n_lri = min(oa_config->mux_regs_len - n_loaded_regs,
+                                       MAX_LRI_SIZE);
+                       *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
+               }
+
+               *cs++ = i915_mmio_reg_offset(oa_config->mux_regs[i].addr);
+               *cs++ = oa_config->mux_regs[i].value;
+               n_loaded_regs++;
+       }
+
+       cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_MMIO_WRITE, 0);
+
+       *cs++ = MI_BATCH_BUFFER_END;
+
+       i915_gem_object_unpin_map(bo);
+
+       ret = i915_gem_object_set_to_gtt_domain(bo, false);
+       if (ret)
+               goto err_unref_bo;
+
+       vma = i915_vma_instance(bo, &dev_priv->ggtt.base, NULL);
+       if (IS_ERR(vma)) {
+               ret = PTR_ERR(vma);
+               goto err_unref_bo;
+       }
+
+       ret = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
+       if (ret)
+               goto err_unref_vma;
+
+       stream->noa_reprogram_vma = vma;
+
+       return 0;
+
+err_unref_vma:
+       i915_vma_put(vma);
+err_unref_bo:
+       i915_gem_object_put(bo);
+
+       return ret;
+}
+
 static int gen8_switch_to_updated_kernel_context(struct drm_i915_private 
*dev_priv,
                                                 const struct i915_oa_config 
*oa_config)
 {
@@ -2102,6 +2192,12 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
                goto err_config;
        }
 
+       ret = alloc_noa_reprogram_bo(stream);
+       if (ret) {
+               DRM_DEBUG("Enable to alloc NOA reprogramming BO\n");
+               goto err_config;
+       }
+
        /* PRM - observability performance counters:
         *
         *   OACONTROL, performance counter enable, note:
@@ -2136,6 +2232,13 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
 
        dev_priv->perf.oa.exclusive_stream = stream;
 
+       ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
+                                                     stream->oa_config);
+       if (ret)
+               goto err_enable;
+
+       stream->ops = &i915_oa_stream_ops;
+
        mutex_unlock(&dev_priv->drm.struct_mutex);
 
        return 0;
@@ -2497,6 +2600,11 @@ static void i915_perf_destroy_locked(struct 
i915_perf_stream *stream)
        if (stream->ctx)
                i915_gem_context_put(stream->ctx);
 
+       if (stream->noa_reprogram_vma) {
+               i915_vma_unpin(stream->noa_reprogram_vma);
+               i915_gem_object_put(stream->noa_reprogram_vma->obj);
+       }
+
        kfree(stream);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_request.h 
b/drivers/gpu/drm/i915/i915_request.h
index eddbd4245cb3..3357ceb4bdb1 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -149,6 +149,12 @@ struct i915_request {
        /** Preallocate space in the ring for the emitting the request */
        u32 reserved_space;
 
+       /*
+        * Pointer in the batchbuffer to where the i915/perf NOA reprogramming
+        * can be inserted just before HW submission.
+        */
+       u32 *perf_prog_start;
+
        /** Batch buffer related to this request if any (used for
         * error state dump only).
         */
diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/intel_gpu_commands.h
index 105e2a9e874a..09b0fded8b17 100644
--- a/drivers/gpu/drm/i915/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/intel_gpu_commands.h
@@ -146,6 +146,7 @@
 #define   MI_BATCH_GTT             (2<<6) /* aliased with (1<<7) on gen4 */
 #define MI_BATCH_BUFFER_START_GEN8     MI_INSTR(0x31, 1)
 #define   MI_BATCH_RESOURCE_STREAMER (1<<10)
+#define   MI_BATCH_SECOND_LEVEL      (1<<22)
 
 /*
  * 3D instructions used by the kernel
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 1c08bd2be6c3..df4a7bb07eae 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -510,6 +510,38 @@ static bool can_merge_ctx(const struct i915_gem_context 
*prev,
        return true;
 }
 
+static void maybe_enable_noa_repgoram(struct i915_request *rq)
+{
+       struct intel_engine_cs *engine = rq->engine;
+       struct drm_i915_private *dev_priv = engine->i915;
+       struct i915_perf_stream *stream = dev_priv->perf.oa.exclusive_stream;
+       struct intel_context *ce;
+       u32 *cs = rq->perf_prog_start;
+
+       /* Slice/subslice/EU powergating only matters on the RCS. */
+       if (engine->id != RCS)
+               return;
+
+       /* Nothing need to reprogram when perf isn't enabled. */
+       if (!stream)
+               return;
+
+       ce = &rq->ctx->__engine[engine->id];
+
+       /*
+        * If the powergating configuration doesn't change, no need to
+        * reprogram.
+        */
+       if (engine->last_sseu.value == ce->sseu.value)
+               return;
+
+       *cs++ = MI_BATCH_BUFFER_START_GEN8 | MI_BATCH_SECOND_LEVEL;
+       *cs++ = 0;
+       *cs++ = i915_ggtt_offset(stream->noa_reprogram_vma);
+
+       engine->last_sseu = ce->sseu;
+}
+
 static void port_assign(struct execlist_port *port, struct i915_request *rq)
 {
        GEM_BUG_ON(rq == port_request(port));
@@ -517,6 +549,8 @@ static void port_assign(struct execlist_port *port, struct 
i915_request *rq)
        if (port_isset(port))
                i915_request_put(port_request(port));
 
+       maybe_enable_noa_repgoram(rq);
+
        port_set(port, port_pack(i915_request_get(rq), port_count(port)));
 }
 
@@ -1047,6 +1081,13 @@ static void execlists_submission_tasklet(unsigned long 
data)
                            buf[2*head + 1] == 
execlists->preempt_complete_status) {
                                GEM_TRACE("%s preempt-idle\n", engine->name);
 
+                               /*
+                                * Clear out the state of the sseu on the
+                                * engine, as it's not clear what it will be
+                                * after preemption.
+                                */
+                               memset(&engine->last_sseu, 0, 
sizeof(engine->last_sseu));
+
                                execlists_cancel_port_requests(execlists);
                                execlists_unwind_incomplete_requests(execlists);
 
@@ -1937,10 +1978,26 @@ static int gen8_emit_bb_start(struct i915_request *rq,
                rq->ctx->ppgtt->pd_dirty_rings &= 
~intel_engine_flag(rq->engine);
        }
 
-       cs = intel_ring_begin(rq, 6);
+       cs = intel_ring_begin(rq, rq->engine->id == RCS ? 10 : 6);
        if (IS_ERR(cs))
                return PTR_ERR(cs);
 
+       if (rq->engine->id == RCS) {
+               /*
+                * Leave some instructions to be written with an
+                * MI_BATCH_BUFFER_START to the i915/perf NOA reprogramming
+                * batchbuffer. We only turn those MI_NOOP into
+                * MI_BATCH_BUFFER_START when we detect a SSEU powergating
+                * configuration change that might affect NOA. This is only
+                * for the RCS.
+                */
+               rq->perf_prog_start = cs;
+               *cs++ = MI_NOOP;
+               *cs++ = MI_NOOP;
+               *cs++ = MI_NOOP;
+               *cs++ = MI_NOOP; /* Aligning to 2 dwords */
+       }
+
        /*
         * WaDisableCtxRestoreArbitration:bdw,chv
         *
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 8f19349a6055..cde791234397 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -525,6 +525,8 @@ static int init_ring_common(struct intel_engine_cs *engine)
        if (INTEL_GEN(dev_priv) > 2)
                I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING));
 
+       memset(&engine->last_sseu, 0, sizeof(engine->last_sseu));
+
 out:
        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 010750e8ee44..2f8518e1a49f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -343,6 +343,8 @@ struct intel_engine_cs {
 
        struct drm_i915_gem_object *default_state;
 
+       union i915_gem_sseu last_sseu;
+
        atomic_t irq_count;
        unsigned long irq_posted;
 #define ENGINE_IRQ_BREADCRUMB 0
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to