Pre-calculate engine context size based on engine class and device
generation and store it in the engine instance.

v2:
- Squash and get rid of hw_context_size (Chris)

v3:
- Move after MMIO init for probing on Gen7 and 8 (Chris)
- Retained rounding (Tvrtko)
v4:
- Rebase for deferred legacy context allocation

Signed-off-by: Joonas Lahtinen <joonas.lahti...@linux.intel.com>
Cc: Paulo Zanoni <paulo.r.zan...@intel.com>
Cc: Rodrigo Vivi <rodrigo.v...@intel.com>
Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospu...@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
Cc: Oscar Mateo <oscar.ma...@intel.com>
Cc: Zhenyu Wang <zhen...@linux.intel.com>
Cc: intel-gvt-...@lists.freedesktop.org
Acked-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
Reviewed-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gvt/scheduler.c       |  6 +-
 drivers/gpu/drm/i915/i915_drv.c            | 15 +++--
 drivers/gpu/drm/i915/i915_drv.h            |  3 +-
 drivers/gpu/drm/i915/i915_gem_context.c    | 56 ++-----------------
 drivers/gpu/drm/i915/i915_guc_submission.c |  3 +-
 drivers/gpu/drm/i915/i915_reg.h            | 10 ----
 drivers/gpu/drm/i915/intel_engine_cs.c     | 90 +++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_lrc.c           | 54 +-----------------
 drivers/gpu/drm/i915/intel_lrc.h           |  2 -
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  4 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  7 ++-
 11 files changed, 112 insertions(+), 138 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c 
b/drivers/gpu/drm/i915/gvt/scheduler.c
index bada32b..1256fe2 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -69,8 +69,7 @@ static int populate_shadow_context(struct intel_vgpu_workload 
*workload)
        gvt_dbg_sched("ring id %d workload lrca %x", ring_id,
                        workload->ctx_desc.lrca);
 
-       context_page_num = intel_lr_context_size(
-                       gvt->dev_priv->engine[ring_id]);
+       context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
 
        context_page_num = context_page_num >> PAGE_SHIFT;
 
@@ -330,8 +329,7 @@ static void update_guest_context(struct intel_vgpu_workload 
*workload)
        gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id,
                        workload->ctx_desc.lrca);
 
-       context_page_num = intel_lr_context_size(
-                       gvt->dev_priv->engine[ring_id]);
+       context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
 
        context_page_num = context_page_num >> PAGE_SHIFT;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c7d68e7..2d3c4264 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -835,10 +835,6 @@ static int i915_driver_init_early(struct drm_i915_private 
*dev_priv,
        intel_uc_init_early(dev_priv);
        i915_memcpy_init_early(dev_priv);
 
-       ret = intel_engines_init_early(dev_priv);
-       if (ret)
-               return ret;
-
        ret = i915_workqueues_init(dev_priv);
        if (ret < 0)
                goto err_engines;
@@ -948,14 +944,21 @@ static int i915_driver_init_mmio(struct drm_i915_private 
*dev_priv)
 
        ret = i915_mmio_setup(dev_priv);
        if (ret < 0)
-               goto put_bridge;
+               goto err_bridge;
 
        intel_uncore_init(dev_priv);
+
+       ret = intel_engines_init_mmio(dev_priv);
+       if (ret)
+               goto err_uncore;
+
        i915_gem_init_mmio(dev_priv);
 
        return 0;
 
-put_bridge:
+err_uncore:
+       intel_uncore_fini(dev_priv);
+err_bridge:
        pci_dev_put(dev_priv->bridge_dev);
 
        return ret;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d1f7c48..e68edf1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2359,7 +2359,6 @@ struct drm_i915_private {
         */
        struct mutex av_mutex;
 
-       uint32_t hw_context_size;
        struct list_head context_list;
 
        u32 fdi_rx_config;
@@ -3023,7 +3022,7 @@ extern unsigned long i915_gfx_val(struct drm_i915_private 
*dev_priv);
 extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
 
-int intel_engines_init_early(struct drm_i915_private *dev_priv);
+int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
 int intel_engines_init(struct drm_i915_private *dev_priv);
 
 /* intel_hotplug.c */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index d46a69d..31a73c3 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -92,33 +92,6 @@
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
-static int get_context_size(struct drm_i915_private *dev_priv)
-{
-       int ret;
-       u32 reg;
-
-       switch (INTEL_GEN(dev_priv)) {
-       case 6:
-               reg = I915_READ(CXT_SIZE);
-               ret = GEN6_CXT_TOTAL_SIZE(reg) * 64;
-               break;
-       case 7:
-               reg = I915_READ(GEN7_CXT_SIZE);
-               if (IS_HASWELL(dev_priv))
-                       ret = HSW_CXT_TOTAL_SIZE;
-               else
-                       ret = GEN7_CXT_TOTAL_SIZE(reg) * 64;
-               break;
-       case 8:
-               ret = GEN8_CXT_TOTAL_SIZE;
-               break;
-       default:
-               BUG();
-       }
-
-       return ret;
-}
-
 void i915_gem_context_free(struct kref *ctx_ref)
 {
        struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
@@ -384,21 +357,6 @@ int i915_gem_context_init(struct drm_i915_private 
*dev_priv)
        BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
        ida_init(&dev_priv->context_hw_ida);
 
-       if (i915.enable_execlists) {
-               /* NB: intentionally left blank. We will allocate our own
-                * backing objects as we need them, thank you very much */
-               dev_priv->hw_context_size = 0;
-       } else if (HAS_HW_CONTEXTS(dev_priv)) {
-               dev_priv->hw_context_size =
-                       round_up(get_context_size(dev_priv),
-                                I915_GTT_PAGE_SIZE);
-               if (dev_priv->hw_context_size > (1<<20)) {
-                       DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size 
%d\n",
-                                        dev_priv->hw_context_size);
-                       dev_priv->hw_context_size = 0;
-               }
-       }
-
        ctx = i915_gem_create_context(dev_priv, NULL);
        if (IS_ERR(ctx)) {
                DRM_ERROR("Failed to create default global context (error 
%ld)\n",
@@ -418,8 +376,8 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv)
        GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
 
        DRM_DEBUG_DRIVER("%s context support initialized\n",
-                       i915.enable_execlists ? "LR" :
-                       dev_priv->hw_context_size ? "HW" : "fake");
+                        dev_priv->engine[RCS]->context_size ? "logical" :
+                        "fake");
        return 0;
 }
 
@@ -882,11 +840,6 @@ int i915_gem_switch_to_kernel_context(struct 
drm_i915_private *dev_priv)
        return 0;
 }
 
-static bool contexts_enabled(struct drm_device *dev)
-{
-       return i915.enable_execlists || to_i915(dev)->hw_context_size;
-}
-
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
 {
        return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS;
@@ -895,12 +848,13 @@ static bool client_is_banned(struct drm_i915_file_private 
*file_priv)
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
                                  struct drm_file *file)
 {
+       struct drm_i915_private *dev_priv = to_i915(dev);
        struct drm_i915_gem_context_create *args = data;
        struct drm_i915_file_private *file_priv = file->driver_priv;
        struct i915_gem_context *ctx;
        int ret;
 
-       if (!contexts_enabled(dev))
+       if (!dev_priv->engine[RCS]->context_size)
                return -ENODEV;
 
        if (args->pad != 0)
@@ -918,7 +872,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, 
void *data,
        if (ret)
                return ret;
 
-       ctx = i915_gem_create_context(to_i915(dev), file_priv);
+       ctx = i915_gem_create_context(dev_priv, file_priv);
        mutex_unlock(&dev->struct_mutex);
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
b/drivers/gpu/drm/i915/i915_guc_submission.c
index 4cc97bf..7e85b5a 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -1051,8 +1051,7 @@ static int guc_ads_create(struct intel_guc *guc)
                dev_priv->engine[RCS]->status_page.ggtt_offset;
 
        for_each_engine(engine, dev_priv, id)
-               blob->ads.eng_state_size[engine->guc_id] =
-                       intel_lr_context_size(engine);
+               blob->ads.eng_state_size[engine->guc_id] = engine->context_size;
 
        base = guc_ggtt_offset(vma);
        blob->ads.scheduler_policies = base + ptr_offset(blob, policies);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 4c72ada..ee8170c 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3370,16 +3370,6 @@ enum skl_disp_power_wells {
 #define GEN7_CXT_VFSTATE_SIZE(ctx_reg) (((ctx_reg) >> 0) & 0x3f)
 #define GEN7_CXT_TOTAL_SIZE(ctx_reg)   (GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \
                                         GEN7_CXT_VFSTATE_SIZE(ctx_reg))
-/* Haswell does have the CXT_SIZE register however it does not appear to be
- * valid. Now, docs explain in dwords what is in the context object. The full
- * size is 70720 bytes, however, the power context and execlist context will
- * never be saved (power context is stored elsewhere, and execlists don't work
- * on HSW) - so the final size, including the extra state required for the
- * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
- */
-#define HSW_CXT_TOTAL_SIZE             (17 * PAGE_SIZE)
-/* Same as Haswell, but 72064 bytes now. */
-#define GEN8_CXT_TOTAL_SIZE            (18 * PAGE_SIZE)
 
 enum {
        INTEL_ADVANCED_CONTEXT = 0,
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
b/drivers/gpu/drm/i915/intel_engine_cs.c
index 82a274b..6d3d838 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -26,6 +26,22 @@
 #include "intel_ringbuffer.h"
 #include "intel_lrc.h"
 
+/* Haswell does have the CXT_SIZE register however it does not appear to be
+ * valid. Now, docs explain in dwords what is in the context object. The full
+ * size is 70720 bytes, however, the power context and execlist context will
+ * never be saved (power context is stored elsewhere, and execlists don't work
+ * on HSW) - so the final size, including the extra state required for the
+ * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
+ */
+#define HSW_CXT_TOTAL_SIZE             (17 * PAGE_SIZE)
+/* Same as Haswell, but 72064 bytes now. */
+#define GEN8_CXT_TOTAL_SIZE            (18 * PAGE_SIZE)
+
+#define GEN8_LR_CONTEXT_RENDER_SIZE    (20 * PAGE_SIZE)
+#define GEN9_LR_CONTEXT_RENDER_SIZE    (22 * PAGE_SIZE)
+
+#define GEN8_LR_CONTEXT_OTHER_SIZE     ( 2 * PAGE_SIZE)
+
 struct engine_class_info {
        const char *name;
        int (*init_legacy)(struct intel_engine_cs *engine);
@@ -107,6 +123,69 @@ static const struct engine_info intel_engines[] = {
        },
 };
 
+/**
+ * ___intel_engine_context_size() - return the size of the context for an 
engine
+ * @dev_priv: i915 device private
+ * @class: engine class
+ *
+ * Each engine class may require a different amount of space for a context
+ * image.
+ *
+ * Return: size (in bytes) of an engine class specific context image
+ *
+ * Note: this size includes the HWSP, which is part of the context image
+ * in LRC mode, but does not include the "shared data page" used with
+ * GuC submission. The caller should account for this if using the GuC.
+ */
+static u32
+__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
+{
+       u32 cxt_size;
+
+       BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
+
+       switch (class) {
+       case RENDER_CLASS:
+               switch (INTEL_GEN(dev_priv)) {
+               default:
+                       MISSING_CASE(INTEL_GEN(dev_priv));
+               case 9:
+                       return GEN9_LR_CONTEXT_RENDER_SIZE;
+               case 8:
+                       return i915.enable_execlists ?
+                              GEN8_LR_CONTEXT_RENDER_SIZE :
+                              GEN8_CXT_TOTAL_SIZE;
+               case 7:
+                       if (IS_HASWELL(dev_priv))
+                               return HSW_CXT_TOTAL_SIZE;
+
+                       cxt_size = I915_READ(GEN7_CXT_SIZE);
+                       return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
+                                       PAGE_SIZE);
+               case 6:
+                       cxt_size = I915_READ(CXT_SIZE);
+                       return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
+                                       PAGE_SIZE);
+               case 5:
+               case 4:
+               case 3:
+               case 2:
+               /* For the special day when i810 gets merged. */
+               case 1:
+                       return 0;
+               }
+               break;
+       default:
+               MISSING_CASE(class);
+       case VIDEO_DECODE_CLASS:
+       case VIDEO_ENHANCEMENT_CLASS:
+       case COPY_ENGINE_CLASS:
+               if (INTEL_GEN(dev_priv) < 8)
+                       return 0;
+               return GEN8_LR_CONTEXT_OTHER_SIZE;
+       }
+}
+
 static int
 intel_engine_setup(struct drm_i915_private *dev_priv,
                   enum intel_engine_id id)
@@ -135,6 +214,11 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
        engine->class = info->class;
        engine->instance = info->instance;
 
+       engine->context_size = __intel_engine_context_size(dev_priv,
+                                                          engine->class);
+       if (WARN_ON(engine->context_size > BIT(20)))
+               engine->context_size = 0;
+
        /* Nothing to do here, execute in order of dependencies */
        engine->schedule = NULL;
 
@@ -145,12 +229,12 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 }
 
 /**
- * intel_engines_init_early() - allocate the Engine Command Streamers
+ * intel_engines_init_mmio() - allocate and prepare the Engine Command 
Streamers
  * @dev_priv: i915 device private
  *
  * Return: non-zero if the initialization failed.
  */
-int intel_engines_init_early(struct drm_i915_private *dev_priv)
+int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
 {
        struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
        const unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask;
@@ -200,7 +284,7 @@ int intel_engines_init_early(struct drm_i915_private 
*dev_priv)
 }
 
 /**
- * intel_engines_init() - allocate, populate and init the Engine Command 
Streamers
+ * intel_engines_init() - init the Engine Command Streamers
  * @dev_priv: i915 device private
  *
  * Return: non-zero if the initialization failed.
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5ec064a..0909549 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -138,10 +138,6 @@
 #include "i915_drv.h"
 #include "intel_mocs.h"
 
-#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
-#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
-#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
-
 #define RING_EXECLIST_QFULL            (1 << 0x2)
 #define RING_EXECLIST1_VALID           (1 << 0x3)
 #define RING_EXECLIST0_VALID           (1 << 0x4)
@@ -1918,53 +1914,6 @@ populate_lr_context(struct i915_gem_context *ctx,
        return 0;
 }
 
-/**
- * intel_lr_context_size() - return the size of the context for an engine
- * @engine: which engine to find the context size for
- *
- * Each engine may require a different amount of space for a context image,
- * so when allocating (or copying) an image, this function can be used to
- * find the right size for the specific engine.
- *
- * Return: size (in bytes) of an engine-specific context image
- *
- * Note: this size includes the HWSP, which is part of the context image
- * in LRC mode, but does not include the "shared data page" used with
- * GuC submission. The caller should account for this if using the GuC.
- */
-uint32_t intel_lr_context_size(struct intel_engine_cs *engine)
-{
-       struct drm_i915_private *dev_priv = engine->i915;
-       int ret;
-
-       WARN_ON(INTEL_GEN(dev_priv) < 8);
-
-       switch (engine->class) {
-       case RENDER_CLASS:
-               switch (INTEL_GEN(dev_priv)) {
-               default:
-                       MISSING_CASE(INTEL_GEN(dev_priv));
-               case 9:
-                       ret = GEN9_LR_CONTEXT_RENDER_SIZE;
-                       break;
-               case 8:
-                       ret = GEN8_LR_CONTEXT_RENDER_SIZE;
-                       break;
-               }
-               break;
-
-       default:
-               MISSING_CASE(engine->class);
-       case VIDEO_DECODE_CLASS:
-       case VIDEO_ENHANCEMENT_CLASS:
-       case COPY_ENGINE_CLASS:
-               ret = GEN8_LR_CONTEXT_OTHER_SIZE;
-               break;
-       }
-
-       return ret;
-}
-
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
                                            struct intel_engine_cs *engine)
 {
@@ -1977,8 +1926,7 @@ static int execlists_context_deferred_alloc(struct 
i915_gem_context *ctx,
 
        WARN_ON(ce->state);
 
-       context_size = round_up(intel_lr_context_size(engine),
-                               I915_GTT_PAGE_SIZE);
+       context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
 
        /* One extra page as the sharing data between driver and GuC */
        context_size += PAGE_SIZE * LRC_PPHWSP_PN;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index e8015e7..52b3a1f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -78,8 +78,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
 struct drm_i915_private;
 struct i915_gem_context;
 
-uint32_t intel_lr_context_size(struct intel_engine_cs *engine);
-
 void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
                                     struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 61f6124..29b5afa 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1444,7 +1444,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
        struct drm_i915_gem_object *obj;
        struct i915_vma *vma;
 
-       obj = i915_gem_object_create(i915, i915->hw_context_size);
+       obj = i915_gem_object_create(i915, engine->context_size);
        if (IS_ERR(obj))
                return ERR_CAST(obj);
 
@@ -1487,7 +1487,7 @@ static int intel_ring_context_pin(struct intel_engine_cs 
*engine,
                return 0;
        GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
 
-       if (engine->id == RCS && !ce->state && engine->i915->hw_context_size) {
+       if (!ce->state && engine->context_size) {
                struct i915_vma *vma;
 
                vma = alloc_context_vma(engine);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2506bbe..02d741e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -196,13 +196,14 @@ struct intel_engine_cs {
        enum intel_engine_id id;
        unsigned int uabi_id;
        unsigned int hw_id;
+       unsigned int guc_id;
 
        u8 class;
        u8 instance;
-
-       unsigned int guc_id;
-       u32             mmio_base;
+       u32 context_size;
+       u32 mmio_base;
        unsigned int irq_shift;
+
        struct intel_ring *buffer;
        struct intel_timeline *timeline;
 
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to