From: Praveen Diwakar <[email protected]>

This patch will select optimum eu/slice/sub-slice configuration based on
type of load (low, medium, high) as input.
Based on our readings and experiments we have predefined set of optimum
configuration for each platform(CHT, KBL).
i915_gem_context_set_load_type will select optimum configuration from
pre-defined optimum configuration table(opt_config).

It also introduce flag update_render_config which can set by any governor.

v2:
 * Move static optimum_config to device init time.
 * Rename function to appropriate name, fix data types and patch ordering.
 * Rename prev_load_type to pending_load_type. (Tvrtko Ursulin)

v3:
 * Add safe guard check in i915_gem_context_set_load_type.
 * Rename struct from optimum_config to i915_sseu_optimum_config to
   avoid namespace clashes.
 * Reduces memcpy for space efficient.
 * Rebase.
 * Improved commit message. (Tvrtko Ursulin)

v4:
 * Move optimum config table to file scope. (Tvrtko Ursulin)

Cc: Kedar J Karanje <[email protected]>
Cc: Yogesh Marathe <[email protected]>
Signed-off-by: Praveen Diwakar <[email protected]>
Signed-off-by: Aravindan Muthukumar <[email protected]>
Signed-off-by: Ankit Navik <[email protected]>
---
 drivers/gpu/drm/i915/i915_drv.h          |  5 ++++
 drivers/gpu/drm/i915/i915_gem_context.c  | 20 ++++++++++++++
 drivers/gpu/drm/i915/i915_gem_context.h  | 34 +++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_device_info.c | 47 ++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/intel_lrc.c         | 45 +++++++++++++++++++++++++++++-
 5 files changed, 148 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5c8d048..97cb36b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1593,6 +1593,11 @@ struct drm_i915_private {
        struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 
965 */
        int num_fence_regs; /* 8 on pre-965, 16 otherwise */
 
+       /* optimal slice/subslice/EU configration state */
+       struct i915_sseu_optimum_config *opt_config;
+
+       int predictive_load_enable;
+
        unsigned int fsb_freq, mem_freq, is_ddr3;
        unsigned int skl_preferred_vco_freq;
        unsigned int max_cdclk_freq;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index a5876fe..8f16ef1 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -454,10 +454,30 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
 
        trace_i915_context_create(ctx);
        atomic_set(&ctx->req_cnt, 0);
+       ctx->slice_cnt = hweight8(RUNTIME_INFO(dev_priv)->sseu.slice_mask);
+       ctx->subslice_cnt = hweight8(
+                       RUNTIME_INFO(dev_priv)->sseu.subslice_mask[0]);
+       ctx->eu_cnt = RUNTIME_INFO(dev_priv)->sseu.eu_per_subslice;
 
        return ctx;
 }
 
+
+void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
+               enum gem_load_type type)
+{
+       struct drm_i915_private *dev_priv = ctx->i915;
+
+       if (GEM_WARN_ON(type > LOAD_TYPE_LAST))
+               return;
+
+       /* Call opt_config to get correct configuration for eu,slice,subslice */
+       ctx->slice_cnt = dev_priv->opt_config[type].slice;
+       ctx->subslice_cnt = dev_priv->opt_config[type].subslice;
+       ctx->eu_cnt = dev_priv->opt_config[type].eu;
+       ctx->pending_load_type = type;
+}
+
 /**
  * i915_gem_context_create_gvt - create a GVT GEM context
  * @dev: drm device *
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h 
b/drivers/gpu/drm/i915/i915_gem_context.h
index c940168..0a24d28 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -54,6 +54,19 @@ struct intel_context_ops {
        void (*destroy)(struct intel_context *ce);
 };
 
+enum gem_load_type {
+       LOAD_TYPE_LOW,
+       LOAD_TYPE_MEDIUM,
+       LOAD_TYPE_HIGH,
+       LOAD_TYPE_LAST
+};
+
+struct i915_sseu_optimum_config {
+       u8 slice;
+       u8 subslice;
+       u8 eu;
+};
+
 /*
  * Powergating configuration for a particular (context,engine).
  */
@@ -232,6 +245,25 @@ struct i915_gem_context {
         * go for low/medium/high load configuration of the GPU.
         */
        atomic_t req_cnt;
+
+       /** slice_cnt: used to set the # of slices to be enabled. */
+       u8 slice_cnt;
+
+       /** subslice_cnt: used to set the # of subslices to be enabled. */
+       u8 subslice_cnt;
+
+       /** eu_cnt: used to set the # of eu to be enabled. */
+       u8 eu_cnt;
+
+       /** load_type: The designated load_type (high/medium/low) for a given
+        * number of pending commands in the command queue.
+        */
+       enum gem_load_type load_type;
+
+       /** pending_load_type: The earlier load type that the GPU was configured
+        * for (high/medium/low).
+        */
+       enum gem_load_type pending_load_type;
 };
 
 static inline bool i915_gem_context_is_closed(const struct i915_gem_context 
*ctx)
@@ -375,6 +407,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, 
void *data,
                                    struct drm_file *file_priv);
 int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
                                       struct drm_file *file);
+void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
+               enum gem_load_type type);
 
 struct i915_gem_context *
 i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
b/drivers/gpu/drm/i915/intel_device_info.c
index 855a507..017a1e2 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -707,6 +707,27 @@ static u32 read_timestamp_frequency(struct 
drm_i915_private *dev_priv)
        return 0;
 }
 
+/* static table of slice/subslice/EU for Cherryview */
+static const struct i915_sseu_optimum_config chv_config[LOAD_TYPE_LAST] = {
+       {1, 1, 4},      /* Low */
+       {1, 1, 6},      /* Medium */
+       {1, 2, 6}       /* High */
+};
+
+/* static table of slice/subslice/EU for KBL GT2 */
+static const struct i915_sseu_optimum_config kbl_gt2_config[LOAD_TYPE_LAST] = {
+       {1, 3, 2},      /* Low */
+       {1, 3, 4},      /* Medium */
+       {1, 3, 8}       /* High */
+};
+
+/* static table of slice/subslice/EU for KBL GT3 */
+static const struct i915_sseu_optimum_config kbl_gt3_config[LOAD_TYPE_LAST] = {
+       {2, 3, 4},      /* Low */
+       {2, 3, 6},      /* Medium */
+       {2, 3, 8}       /* High */
+};
+
 /**
  * intel_device_info_runtime_init - initialize runtime info
  * @dev_priv: the i915 device
@@ -728,6 +749,7 @@ void intel_device_info_runtime_init(struct drm_i915_private 
*dev_priv)
        struct intel_device_info *info = mkwrite_device_info(dev_priv);
        struct intel_runtime_info *runtime = RUNTIME_INFO(dev_priv);
        enum pipe pipe;
+       struct i915_sseu_optimum_config *opt_config = NULL;
 
        if (INTEL_GEN(dev_priv) >= 10) {
                for_each_pipe(dev_priv, pipe)
@@ -831,12 +853,30 @@ void intel_device_info_runtime_init(struct 
drm_i915_private *dev_priv)
        /* Initialize slice/subslice/EU info */
        if (IS_HASWELL(dev_priv))
                haswell_sseu_info_init(dev_priv);
-       else if (IS_CHERRYVIEW(dev_priv))
+       else if (IS_CHERRYVIEW(dev_priv)) {
                cherryview_sseu_info_init(dev_priv);
+               opt_config = chv_config;
+               BUILD_BUG_ON(ARRAY_SIZE(chv_config) != LOAD_TYPE_LAST);
+       }
        else if (IS_BROADWELL(dev_priv))
                broadwell_sseu_info_init(dev_priv);
-       else if (IS_GEN(dev_priv, 9))
+       else if (IS_GEN(dev_priv, 9)) {
                gen9_sseu_info_init(dev_priv);
+
+               switch (info->gt) {
+               default: /* fall through */
+               case 2:
+                       opt_config = kbl_gt2_config;
+                       BUILD_BUG_ON(ARRAY_SIZE(kbl_gt2_config)
+                                               != LOAD_TYPE_LAST);
+               break;
+               case 3:
+                       opt_config = kbl_gt3_config;
+                       BUILD_BUG_ON(ARRAY_SIZE(kbl_gt3_config)
+                                               != LOAD_TYPE_LAST);
+               break;
+               }
+       }
        else if (IS_GEN(dev_priv, 10))
                gen10_sseu_info_init(dev_priv);
        else if (INTEL_GEN(dev_priv) >= 11)
@@ -847,6 +887,9 @@ void intel_device_info_runtime_init(struct drm_i915_private 
*dev_priv)
                info->ppgtt = INTEL_PPGTT_NONE;
        }
 
+       if (opt_config)
+               dev_priv->opt_config = opt_config;
+
        /* Initialize command stream timestamp frequency */
        runtime->cs_timestamp_frequency_khz = 
read_timestamp_frequency(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d0af37d..397af1e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1282,6 +1282,35 @@ static int __context_pin(struct i915_gem_context *ctx, 
struct i915_vma *vma)
        return i915_vma_pin(vma, 0, 0, flags);
 }
 
+static u32
+get_context_rpcs_config(struct i915_gem_context *ctx)
+{
+       u32 rpcs = 0;
+       struct drm_i915_private *dev_priv = ctx->i915;
+
+       if (INTEL_GEN(dev_priv) < 8)
+               return 0;
+
+       if (RUNTIME_INFO(dev_priv)->sseu.has_slice_pg) {
+               rpcs |= GEN8_RPCS_S_CNT_ENABLE;
+               rpcs |= ctx->slice_cnt << GEN8_RPCS_S_CNT_SHIFT;
+               rpcs |= GEN8_RPCS_ENABLE;
+       }
+
+       if (RUNTIME_INFO(dev_priv)->sseu.has_subslice_pg) {
+               rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
+               rpcs |= ctx->subslice_cnt << GEN8_RPCS_SS_CNT_SHIFT;
+               rpcs |= GEN8_RPCS_ENABLE;
+       }
+
+       if (RUNTIME_INFO(dev_priv)->sseu.has_eu_pg) {
+               rpcs |= ctx->eu_cnt << GEN8_RPCS_EU_MIN_SHIFT;
+               rpcs |= ctx->eu_cnt << GEN8_RPCS_EU_MAX_SHIFT;
+               rpcs |= GEN8_RPCS_ENABLE;
+       }
+
+       return rpcs;
+}
 static void
 __execlists_update_reg_state(struct intel_engine_cs *engine,
                             struct intel_context *ce)
@@ -1294,9 +1323,20 @@ __execlists_update_reg_state(struct intel_engine_cs 
*engine,
        regs[CTX_RING_TAIL + 1] = ring->tail;
 
        /* RPCS */
-       if (engine->class == RENDER_CLASS)
+       if (engine->class == RENDER_CLASS &&
+                               engine->i915->predictive_load_enable) {
+               u32 rpcs_config = 0;
+               struct i915_gem_context *ctx = ce->gem_context;
+
+               rpcs_config = get_context_rpcs_config(ctx);
+               regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
+               CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
+                                               rpcs_config);
+
+       } else if (engine->class == RENDER_CLASS) {
                regs[CTX_R_PWR_CLK_STATE + 1] = gen8_make_rpcs(engine->i915,
                                                               &ce->sseu);
+       }
 }
 
 static struct intel_context *
@@ -1340,6 +1380,9 @@ __execlists_context_pin(struct intel_engine_cs *engine,
 
        __execlists_update_reg_state(engine, ce);
 
+       if (ctx->load_type != ctx->pending_load_type)
+               ctx->load_type = ctx->pending_load_type;
+
        ce->state->obj->pin_global++;
        i915_gem_context_get(ctx);
        return ce;
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to