Re: [Intel-gfx] [PATCH v2 3/5] drm/i915: context submission pvmmio optimization
Ping review, thanks very much. BRs, Xiaolin -Original Message- From: Zhang, Xiaolin Sent: Friday, October 19, 2018 3:27 PM To: intel-gfx@lists.freedesktop.org Cc: intel-gvt-...@lists.freedesktop.org; Zhang, Xiaolin ; Zhenyu Wang ; Wang, Zhi A ; Chris Wilson ; Joonas Lahtinen ; He; He, Min ; Jiang; Jiang, Fei ; Gong; Gong, Zhipeng ; Yuan; Yuan, Hang ; Lv, Zhiyuan Subject: [PATCH v2 3/5] drm/i915: context submission pvmmio optimization It is performance optimization to reduce mmio trap numbers from 4 to 1 durning ELSP porting writing (context submission). When context subission, to cache elsp_data[4] values in the shared page, the last elsp_data[0] port writing will be trapped to gvt for real context submission. Use PVMMIO_ELSP_SUBMIT to control this level of pvmmio optimization. v0: RFC v1: rebase v2: added pv ops for pv context submission. to maximize code resuse, introduced 2 more ops (submit_ports & preempt_context) instead of 1 op (set_default_submission) in engine structure. pv version of submit_ports and preempt_context implemented. Cc: Zhenyu Wang Cc: Zhi Wang Cc: Chris Wilson Cc: Joonas Lahtinen Cc: He, Min Cc: Jiang, Fei Cc: Gong, Zhipeng Cc: Yuan, Hang Cc: Zhiyuan Lv Signed-off-by: Xiaolin Zhang --- drivers/gpu/drm/i915/i915_vgpu.c| 2 + drivers/gpu/drm/i915/intel_lrc.c| 88 +++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 ++ 3 files changed, 90 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index cb409d5..9870ea6 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -66,6 +66,8 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv) BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE); + dev_priv->vgpu.pv_caps = PVMMIO_ELSP_SUBMIT; + magic = __raw_i915_read64(dev_priv, vgtif_reg(magic)); if (magic != VGT_MAGIC) return; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 22b57b8..9e6ccf9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -460,6 +460,60 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); } +static void execlists_submit_ports_pv(struct intel_engine_cs *engine) { + struct intel_engine_execlists *execlists = >execlists; + struct execlist_port *port = execlists->port; + u32 __iomem *elsp = + engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + u32 *elsp_data; + unsigned int n; + u32 descs[4]; + int i = 0; + + /* +* ELSQ note: the submit queue is not cleared after being submitted +* to the HW so we need to make sure we always clean it up. This is +* currently ensured by the fact that we always write the same number +* of elsq entries, keep this in mind before changing the loop below. +*/ + for (n = execlists_num_ports(execlists); n--; ) { + struct i915_request *rq; + unsigned int count; + u64 desc; + + rq = port_unpack([n], ); + if (rq) { + GEM_BUG_ON(count > !n); + if (!count++) + execlists_context_schedule_in(rq); + port_set([n], port_pack(rq, count)); + desc = execlists_update_context(rq); + } else { + GEM_BUG_ON(!n); + desc = 0; + } + GEM_BUG_ON(i >= 4); + descs[i] = upper_32_bits(desc); + descs[i + 1] = lower_32_bits(desc); + i += 2; + } + + spin_lock(>i915->vgpu.shared_page_lock); + elsp_data = engine->i915->vgpu.shared_page->elsp_data; + *elsp_data = descs[0]; + *(elsp_data + 1) = descs[1]; + *(elsp_data + 2) = descs[2]; + writel(descs[3], elsp); + spin_unlock(>i915->vgpu.shared_page_lock); + + /* we need to manually load the submit queue */ + if (execlists->ctrl_reg) + writel(EL_CTRL_LOAD, execlists->ctrl_reg); + + execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); } + static bool ctx_single_port_submission(const struct intel_context *ce) { return (IS_ENABLED(CONFIG_DRM_I915_GVT) && @@ -497,7 +551,6 @@ static void inject_preempt_context(struct intel_engine_cs *engine) GEM_BUG_ON(execlists->preempt_complete_status != upper_32_bits(ce->lrc_desc)); - /* * Switch to our empty preempt context so * the state of the GPU is known (idle). @@ -516,6 +569,27 @@ static void inject_preempt_context(struct intel_engine_cs *engine) execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); } +static void
[Intel-gfx] [PATCH v2 3/5] drm/i915: context submission pvmmio optimization
It is performance optimization to reduce mmio trap numbers from 4 to 1 durning ELSP porting writing (context submission). When context subission, to cache elsp_data[4] values in the shared page, the last elsp_data[0] port writing will be trapped to gvt for real context submission. Use PVMMIO_ELSP_SUBMIT to control this level of pvmmio optimization. v0: RFC v1: rebase v2: added pv ops for pv context submission. to maximize code resuse, introduced 2 more ops (submit_ports & preempt_context) instead of 1 op (set_default_submission) in engine structure. pv version of submit_ports and preempt_context implemented. Cc: Zhenyu Wang Cc: Zhi Wang Cc: Chris Wilson Cc: Joonas Lahtinen Cc: He, Min Cc: Jiang, Fei Cc: Gong, Zhipeng Cc: Yuan, Hang Cc: Zhiyuan Lv Signed-off-by: Xiaolin Zhang --- drivers/gpu/drm/i915/i915_vgpu.c| 2 + drivers/gpu/drm/i915/intel_lrc.c| 88 +++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 ++ 3 files changed, 90 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index cb409d5..9870ea6 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -66,6 +66,8 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv) BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE); + dev_priv->vgpu.pv_caps = PVMMIO_ELSP_SUBMIT; + magic = __raw_i915_read64(dev_priv, vgtif_reg(magic)); if (magic != VGT_MAGIC) return; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 22b57b8..9e6ccf9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -460,6 +460,60 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); } +static void execlists_submit_ports_pv(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists *execlists = >execlists; + struct execlist_port *port = execlists->port; + u32 __iomem *elsp = + engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + u32 *elsp_data; + unsigned int n; + u32 descs[4]; + int i = 0; + + /* +* ELSQ note: the submit queue is not cleared after being submitted +* to the HW so we need to make sure we always clean it up. This is +* currently ensured by the fact that we always write the same number +* of elsq entries, keep this in mind before changing the loop below. +*/ + for (n = execlists_num_ports(execlists); n--; ) { + struct i915_request *rq; + unsigned int count; + u64 desc; + + rq = port_unpack([n], ); + if (rq) { + GEM_BUG_ON(count > !n); + if (!count++) + execlists_context_schedule_in(rq); + port_set([n], port_pack(rq, count)); + desc = execlists_update_context(rq); + } else { + GEM_BUG_ON(!n); + desc = 0; + } + GEM_BUG_ON(i >= 4); + descs[i] = upper_32_bits(desc); + descs[i + 1] = lower_32_bits(desc); + i += 2; + } + + spin_lock(>i915->vgpu.shared_page_lock); + elsp_data = engine->i915->vgpu.shared_page->elsp_data; + *elsp_data = descs[0]; + *(elsp_data + 1) = descs[1]; + *(elsp_data + 2) = descs[2]; + writel(descs[3], elsp); + spin_unlock(>i915->vgpu.shared_page_lock); + + /* we need to manually load the submit queue */ + if (execlists->ctrl_reg) + writel(EL_CTRL_LOAD, execlists->ctrl_reg); + + execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); +} + static bool ctx_single_port_submission(const struct intel_context *ce) { return (IS_ENABLED(CONFIG_DRM_I915_GVT) && @@ -497,7 +551,6 @@ static void inject_preempt_context(struct intel_engine_cs *engine) GEM_BUG_ON(execlists->preempt_complete_status != upper_32_bits(ce->lrc_desc)); - /* * Switch to our empty preempt context so * the state of the GPU is known (idle). @@ -516,6 +569,27 @@ static void inject_preempt_context(struct intel_engine_cs *engine) execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); } +static void inject_preempt_context_pv(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists *execlists = >execlists; + struct intel_context *ce = + to_intel_context(engine->i915->preempt_context, engine); + u32 __iomem *elsp = + engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + u32 *elsp_data; + + GEM_BUG_ON(execlists->preempt_complete_status != +