[Intel-gfx] [PATCH v3] drm/i915: add schedule out notification of preempted but completed request
There is one corner case missing schedule out notification of the preempted request. The preempted request is just completed when preemption happen, then it will be canceled and won't be resubmitted later, GVT-g will lost the schedule out notification. Here add schedule out notification if found the preempted request has been completed. v2: - refine description, add completed check and notification in execlists_cancel_port_requests. (Chris) v3: - use ternary confitional, remove local variable. (Tvrtko) Cc: Chris Wilson <ch...@chris-wilson.co.uk> Signed-off-by: Weinan Li <weinan.z...@intel.com> Signed-off-by: Zhenyu Wang <zhen...@linux.intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com> --- drivers/gpu/drm/i915/intel_lrc.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 36b376e..b35026b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -672,7 +672,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(!execlists->active); intel_engine_context_out(rq->engine); - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED); + + execlists_context_status_change(rq, + i915_request_completed(rq) ? + INTEL_CONTEXT_SCHEDULE_OUT : + INTEL_CONTEXT_SCHEDULE_PREEMPTED); + i915_request_put(rq); memset(port, 0, sizeof(*port)); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2] drm/i915: add schedule out notification of preempted but completed request
There is one corner case missing schedule out notification of the preempted request. The preempted request is just completed when preemption happen, then it will be canceled and won't be resubmitted later, GVT-g will lost the schedule out notification. Here add schedule out notification if found the preempted request has been completed. v2: - refine description, add completed check and notification in execlists_cancel_port_requests. (Chris) Cc: Chris Wilson <ch...@chris-wilson.co.uk> Signed-off-by: Weinan Li <weinan.z...@intel.com> Signed-off-by: Zhenyu Wang <zhen...@linux.intel.com> --- drivers/gpu/drm/i915/intel_lrc.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e781c91..24a6e68 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -657,10 +657,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine) while (num_ports-- && port_isset(port)) { struct i915_request *rq = port_request(port); + unsigned int notify; GEM_BUG_ON(!execlists->active); intel_engine_context_out(rq->engine); - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED); + + notify = INTEL_CONTEXT_SCHEDULE_PREEMPTED; + if (i915_request_completed(rq)) + notify = INTEL_CONTEXT_SCHEDULE_OUT; + execlists_context_status_change(rq, notify); + i915_request_put(rq); memset(port, 0, sizeof(*port)); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: add schedule out notification of completed request during unwind
There is one corner case missing schedule out notification for GVT-g in __unwind_incomplete_requests, it may cause vgpu no response. Add notification when ensure one request has been completed during doing unwind. Signed-off-by: Weinan Li <weinan.z...@intel.com> Signed-off-by: Zhenyu Wang <zhen...@linux.intel.com> --- drivers/gpu/drm/i915/intel_lrc.c | 63 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index deeedfc..5b937d3 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -274,6 +274,35 @@ static void unwind_wa_tail(struct drm_i915_gem_request *rq) assert_ring_tail_valid(rq->ring, rq->tail); } +static inline void +execlists_context_status_change(struct drm_i915_gem_request *rq, + unsigned long status) +{ + /* +* Only used when GVT-g is enabled now. When GVT-g is disabled, +* The compiler should eliminate this function as dead-code. +*/ + if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) + return; + + atomic_notifier_call_chain(>engine->context_status_notifier, + status, rq); +} + +static inline void +execlists_context_schedule_in(struct drm_i915_gem_request *rq) +{ + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); + intel_engine_context_in(rq->engine); +} + +static inline void +execlists_context_schedule_out(struct drm_i915_gem_request *rq) +{ + intel_engine_context_out(rq->engine); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); +} + static void __unwind_incomplete_requests(struct intel_engine_cs *engine) { struct drm_i915_gem_request *rq, *rn; @@ -285,9 +314,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) list_for_each_entry_safe_reverse(rq, rn, >timeline->requests, link) { - if (i915_gem_request_completed(rq)) + if (i915_gem_request_completed(rq)) { + execlists_context_schedule_out(rq); return; - + } __i915_gem_request_unsubmit(rq); unwind_wa_tail(rq); @@ -316,35 +346,6 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) spin_unlock_irq(>timeline->lock); } -static inline void -execlists_context_status_change(struct drm_i915_gem_request *rq, - unsigned long status) -{ - /* -* Only used when GVT-g is enabled now. When GVT-g is disabled, -* The compiler should eliminate this function as dead-code. -*/ - if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) - return; - - atomic_notifier_call_chain(>engine->context_status_notifier, - status, rq); -} - -static inline void -execlists_context_schedule_in(struct drm_i915_gem_request *rq) -{ - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); - intel_engine_context_in(rq->engine); -} - -static inline void -execlists_context_schedule_out(struct drm_i915_gem_request *rq) -{ - intel_engine_context_out(rq->engine); - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); -} - static void execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) { -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 3/4] drm/i915/gvt: refine mocs save restore policy
Save and restore the mocs regs of one VM in GVT-g burning too much CPU utilization. Add LRI command scan to monitor the change of mocs registers, save the state in vreg, and use delta update policy to restore them. It can obviously reduce the MMIO r/w count, and improve the performance of context switch. Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 19 +++ drivers/gpu/drm/i915/gvt/mmio_context.c | 33 ++--- 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 18c4573..be5c519b 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -825,6 +825,21 @@ static int force_nonpriv_reg_handler(struct parser_exec_state *s, return 0; } +static inline bool is_mocs_mmio(unsigned int offset) +{ + return ((offset >= 0xc800) && (offset <= 0xcff8)) || + ((offset >= 0xb020) && (offset <= 0xb0a0)); +} + +static int mocs_cmd_reg_handler(struct parser_exec_state *s, + unsigned int offset, unsigned int index) +{ + if (!is_mocs_mmio(offset)) + return -EINVAL; + vgpu_vreg(s->vgpu, offset) = cmd_val(s, index + 1); + return 0; +} + static int cmd_reg_handler(struct parser_exec_state *s, unsigned int offset, unsigned int index, char *cmd) { @@ -848,6 +863,10 @@ static int cmd_reg_handler(struct parser_exec_state *s, return 0; } + if (is_mocs_mmio(offset) && + mocs_cmd_reg_handler(s, offset, index)) + return -EINVAL; + if (is_force_nonpriv_mmio(offset) && force_nonpriv_reg_handler(s, offset, index)) return -EPERM; diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 5ad72fc..06ea3d2 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -203,6 +203,8 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, { struct drm_i915_private *dev_priv; i915_reg_t offset, l3_offset; + u32 old_v, new_v; + u32 regs[] = { [RCS] = 0xc800, [VCS] = 0xc900, @@ -220,16 +222,17 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, for (i = 0; i < 64; i++) { if (pre) - vgpu_vreg(pre, offset) = - I915_READ_FW(offset); + old_v = vgpu_vreg(pre, offset); else - gen9_render_mocs[ring_id][i] = - I915_READ_FW(offset); - + old_v = gen9_render_mocs[ring_id][i] + = I915_READ_FW(offset); if (next) - I915_WRITE_FW(offset, vgpu_vreg(next, offset)); + new_v = vgpu_vreg(next, offset); else - I915_WRITE_FW(offset, gen9_render_mocs[ring_id][i]); + new_v = gen9_render_mocs[ring_id][i]; + + if (old_v != new_v) + I915_WRITE_FW(offset, new_v); offset.reg += 4; } @@ -238,17 +241,17 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, l3_offset.reg = 0xb020; for (i = 0; i < 32; i++) { if (pre) - vgpu_vreg(pre, l3_offset) = - I915_READ_FW(l3_offset); + old_v = vgpu_vreg(pre, l3_offset); else - gen9_render_mocs_L3[i] = - I915_READ_FW(l3_offset); + old_v = gen9_render_mocs_L3[i] + = I915_READ_FW(offset); if (next) - I915_WRITE_FW(l3_offset, - vgpu_vreg(next, l3_offset)); + new_v = vgpu_vreg(next, l3_offset); else - I915_WRITE_FW(l3_offset, - gen9_render_mocs_L3[i]); + new_v = gen9_render_mocs_L3[i]; + + if (old_v != new_v) + I915_WRITE_FW(l3_offset, new_v); l3_offset.reg += 4; } -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 1/4] drm/i915/gvt: refine trace_render_mmio
Refine trace_render_mmio to show the vm id before and after vgpu switch, tag host id as '0', this patch will be used in the future patch for refine mocs switch policy. Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/gvt/mmio_context.c | 4 ++-- drivers/gpu/drm/i915/gvt/trace.h| 15 +-- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 8a52b56..77d3a0d 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -304,7 +304,7 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) I915_WRITE_FW(mmio->reg, v); - trace_render_mmio(vgpu->id, "load", + trace_render_mmio(0, vgpu->id, "switch", i915_mmio_reg_offset(mmio->reg), mmio->value, v); } @@ -340,7 +340,7 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id) I915_WRITE_FW(mmio->reg, v); - trace_render_mmio(vgpu->id, "restore", + trace_render_mmio(vgpu->id, 0, "switch", i915_mmio_reg_offset(mmio->reg), mmio->value, v); } diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h index 8c15038..7a25115 100644 --- a/drivers/gpu/drm/i915/gvt/trace.h +++ b/drivers/gpu/drm/i915/gvt/trace.h @@ -330,13 +330,14 @@ ); TRACE_EVENT(render_mmio, - TP_PROTO(int id, char *action, unsigned int reg, + TP_PROTO(int old_id, int new_id, char *action, unsigned int reg, unsigned int old_val, unsigned int new_val), - TP_ARGS(id, action, reg, new_val, old_val), + TP_ARGS(old_id, new_id, action, reg, new_val, old_val), TP_STRUCT__entry( - __field(int, id) + __field(int, old_id) + __field(int, new_id) __array(char, buf, GVT_TEMP_STR_LEN) __field(unsigned int, reg) __field(unsigned int, old_val) @@ -344,15 +345,17 @@ ), TP_fast_assign( - __entry->id = id; + __entry->old_id = old_id; + __entry->new_id = new_id; snprintf(__entry->buf, GVT_TEMP_STR_LEN, "%s", action); __entry->reg = reg; __entry->old_val = old_val; __entry->new_val = new_val; ), - TP_printk("VM%u %s reg %x, old %08x new %08x\n", - __entry->id, __entry->buf, __entry->reg, + TP_printk("VM%u -> VM%u %s reg %x, old %08x new %08x\n", + __entry->old_id, __entry->new_id, + __entry->buf, __entry->reg, __entry->old_val, __entry->new_val) ); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 2/4] drm/i915/gvt: optimize for vGPU mmio switch
Now mmio switch between vGPUs need to switch to host first then to expected vGPU, it waste one time mmio save/restore. r/w mmio usually is time-consuming, and there are so many mocs registers need to save/restore during vGPU switch. Combine the switch_to_host and switch_to_vgpu can reduce 1 time mmio save/restore, it will reduce the CPU utilization and performance while there is multi VMs with heavy work load. Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/gvt/mmio_context.c | 196 ++-- 1 file changed, 85 insertions(+), 111 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 77d3a0d..5ad72fc 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -198,9 +198,10 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) gvt_dbg_core("invalidate TLB for ring %d\n", ring_id); } -static void load_mocs(struct intel_vgpu *vgpu, int ring_id) +static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, + int ring_id) { - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + struct drm_i915_private *dev_priv; i915_reg_t offset, l3_offset; u32 regs[] = { [RCS] = 0xc800, @@ -211,54 +212,44 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id) }; int i; + dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv; if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) return; offset.reg = regs[ring_id]; - for (i = 0; i < 64; i++) { - gen9_render_mocs[ring_id][i] = I915_READ_FW(offset); - I915_WRITE_FW(offset, vgpu_vreg(vgpu, offset)); - offset.reg += 4; - } - - if (ring_id == RCS) { - l3_offset.reg = 0xb020; - for (i = 0; i < 32; i++) { - gen9_render_mocs_L3[i] = I915_READ_FW(l3_offset); - I915_WRITE_FW(l3_offset, vgpu_vreg(vgpu, l3_offset)); - l3_offset.reg += 4; - } - } -} -static void restore_mocs(struct intel_vgpu *vgpu, int ring_id) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - i915_reg_t offset, l3_offset; - u32 regs[] = { - [RCS] = 0xc800, - [VCS] = 0xc900, - [VCS2] = 0xca00, - [BCS] = 0xcc00, - [VECS] = 0xcb00, - }; - int i; + for (i = 0; i < 64; i++) { + if (pre) + vgpu_vreg(pre, offset) = + I915_READ_FW(offset); + else + gen9_render_mocs[ring_id][i] = + I915_READ_FW(offset); - if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) - return; + if (next) + I915_WRITE_FW(offset, vgpu_vreg(next, offset)); + else + I915_WRITE_FW(offset, gen9_render_mocs[ring_id][i]); - offset.reg = regs[ring_id]; - for (i = 0; i < 64; i++) { - vgpu_vreg(vgpu, offset) = I915_READ_FW(offset); - I915_WRITE_FW(offset, gen9_render_mocs[ring_id][i]); offset.reg += 4; } if (ring_id == RCS) { l3_offset.reg = 0xb020; for (i = 0; i < 32; i++) { - vgpu_vreg(vgpu, l3_offset) = I915_READ_FW(l3_offset); - I915_WRITE_FW(l3_offset, gen9_render_mocs_L3[i]); + if (pre) + vgpu_vreg(pre, l3_offset) = + I915_READ_FW(l3_offset); + else + gen9_render_mocs_L3[i] = + I915_READ_FW(l3_offset); + if (next) + I915_WRITE_FW(l3_offset, + vgpu_vreg(next, l3_offset)); + else + I915_WRITE_FW(l3_offset, + gen9_render_mocs_L3[i]); + l3_offset.reg += 4; } } @@ -266,84 +257,77 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id) #define CTX_CONTEXT_CONTROL_VAL0x03 -/* Switch ring mmio values (context) from host to a vgpu. */ -static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) +/* Switch ring mmio values (context). */ +static void switch_mmio(struct intel_vgpu *pre, + struct intel_vgpu *next, + int ring_id) { - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - struct intel_vgpu_submission *s =
[Intel-gfx] [PATCH v2 4/4] drm/i915/gvt: load host render mocs once in mocs switch
Load host render mocs registers once for delta update of mocs switch, it reduces mmio read times obviously, then brings performance improvement during multi-vms switch. Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/gvt/mmio_context.c | 51 +++-- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 06ea3d2..94ac939 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -149,8 +149,41 @@ { /* Terminated */ } }; -static u32 gen9_render_mocs[I915_NUM_ENGINES][64]; -static u32 gen9_render_mocs_L3[32]; +static struct { + bool initialized; + u32 control_table[I915_NUM_ENGINES][64]; + u32 l3cc_table[32]; +} gen9_render_mocs; + +static void load_render_mocs(struct drm_i915_private *dev_priv) +{ + i915_reg_t offset; + u32 regs[] = { + [RCS] = 0xc800, + [VCS] = 0xc900, + [VCS2] = 0xca00, + [BCS] = 0xcc00, + [VECS] = 0xcb00, + }; + int ring_id, i; + + for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) { + offset.reg = regs[ring_id]; + for (i = 0; i < 64; i++) { + gen9_render_mocs.control_table[ring_id][i] = + I915_READ_FW(offset); + offset.reg += 4; + } + } + + offset.reg = 0xb020; + for (i = 0; i < 32; i++) { + gen9_render_mocs.l3cc_table[i] = + I915_READ_FW(offset); + offset.reg += 4; + } + gen9_render_mocs.initialized = true; +} static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) { @@ -218,18 +251,19 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) return; - offset.reg = regs[ring_id]; + if (!pre && !gen9_render_mocs.initialized) + load_render_mocs(dev_priv); + offset.reg = regs[ring_id]; for (i = 0; i < 64; i++) { if (pre) old_v = vgpu_vreg(pre, offset); else - old_v = gen9_render_mocs[ring_id][i] - = I915_READ_FW(offset); + old_v = gen9_render_mocs.control_table[ring_id][i]; if (next) new_v = vgpu_vreg(next, offset); else - new_v = gen9_render_mocs[ring_id][i]; + new_v = gen9_render_mocs.control_table[ring_id][i]; if (old_v != new_v) I915_WRITE_FW(offset, new_v); @@ -243,12 +277,11 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, if (pre) old_v = vgpu_vreg(pre, l3_offset); else - old_v = gen9_render_mocs_L3[i] - = I915_READ_FW(offset); + old_v = gen9_render_mocs.l3cc_table[i]; if (next) new_v = vgpu_vreg(next, l3_offset); else - new_v = gen9_render_mocs_L3[i]; + new_v = gen9_render_mocs.l3cc_table[i]; if (old_v != new_v) I915_WRITE_FW(l3_offset, new_v); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 0/4] mmio save restore refine in vgpu switch
Merge switch_mmio_to_vgpu and switch_mmio_to_host, use delta update for mocs save restore, deal host mocs value as fixed, it won't be changed after initialization. These can save vgpu switch time to reduce CPU utilization and improve GPU performance in GVT-g with multi-VMs. v2: code rebase Weinan Li (4): drm/i915/gvt: refine trace_render_mmio drm/i915/gvt: optimize for vGPU mmio switch drm/i915/gvt: refine mocs save restore policy drm/i915/gvt: load host render mocs once in mocs switch drivers/gpu/drm/i915/gvt/cmd_parser.c | 19 +++ drivers/gpu/drm/i915/gvt/mmio_context.c | 236 +--- drivers/gpu/drm/i915/gvt/trace.h| 15 +- 3 files changed, 151 insertions(+), 119 deletions(-) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/3] drm/i915/gvt: refine mocs save restore policy
Save and restore the mocs regs of one VM in GVT-g burning too much CPU utilization. Add LRI command scan to monitor the change of mocs registers, save the state in vreg, and use delta update policy to restore them. It can obviously reduce the MMIO r/w count, and improve the performance of context switch. Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 19 +++ drivers/gpu/drm/i915/gvt/render.c | 33 ++--- 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 18c4573..be5c519b 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -825,6 +825,21 @@ static int force_nonpriv_reg_handler(struct parser_exec_state *s, return 0; } +static inline bool is_mocs_mmio(unsigned int offset) +{ + return ((offset >= 0xc800) && (offset <= 0xcff8)) || + ((offset >= 0xb020) && (offset <= 0xb0a0)); +} + +static int mocs_cmd_reg_handler(struct parser_exec_state *s, + unsigned int offset, unsigned int index) +{ + if (!is_mocs_mmio(offset)) + return -EINVAL; + vgpu_vreg(s->vgpu, offset) = cmd_val(s, index + 1); + return 0; +} + static int cmd_reg_handler(struct parser_exec_state *s, unsigned int offset, unsigned int index, char *cmd) { @@ -848,6 +863,10 @@ static int cmd_reg_handler(struct parser_exec_state *s, return 0; } + if (is_mocs_mmio(offset) && + mocs_cmd_reg_handler(s, offset, index)) + return -EINVAL; + if (is_force_nonpriv_mmio(offset) && force_nonpriv_reg_handler(s, offset, index)) return -EPERM; diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index ec1e60d..724f10d 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -195,6 +195,8 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, { struct drm_i915_private *dev_priv; i915_reg_t offset, l3_offset; + u32 old_v, new_v; + u32 regs[] = { [RCS] = 0xc800, [VCS] = 0xc900, @@ -212,16 +214,17 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, for (i = 0; i < 64; i++) { if (pre) - vgpu_vreg(pre, offset) = - I915_READ_FW(offset); + old_v = vgpu_vreg(pre, offset); else - gen9_render_mocs[ring_id][i] = - I915_READ_FW(offset); - + old_v = gen9_render_mocs[ring_id][i] + = I915_READ_FW(offset); if (next) - I915_WRITE_FW(offset, vgpu_vreg(next, offset)); + new_v = vgpu_vreg(next, offset); else - I915_WRITE_FW(offset, gen9_render_mocs[ring_id][i]); + new_v = gen9_render_mocs[ring_id][i]; + + if (old_v != new_v) + I915_WRITE_FW(offset, new_v); offset.reg += 4; } @@ -230,17 +233,17 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, l3_offset.reg = 0xb020; for (i = 0; i < 32; i++) { if (pre) - vgpu_vreg(pre, l3_offset) = - I915_READ_FW(l3_offset); + old_v = vgpu_vreg(pre, l3_offset); else - gen9_render_mocs_L3[i] = - I915_READ_FW(l3_offset); + old_v = gen9_render_mocs_L3[i] + = I915_READ_FW(offset); if (next) - I915_WRITE_FW(l3_offset, - vgpu_vreg(next, l3_offset)); + new_v = vgpu_vreg(next, l3_offset); else - I915_WRITE_FW(l3_offset, - gen9_render_mocs_L3[i]); + new_v = gen9_render_mocs_L3[i]; + + if (old_v != new_v) + I915_WRITE_FW(l3_offset, new_v); l3_offset.reg += 4; } -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 3/3] drm/i915/gvt: load host render mocs once in mocs switch
Load host render mocs registers once for delta update of mocs switch, it reduces mmio read times obviously, then brings performance improvement during multi-vms switch. Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/gvt/render.c | 51 --- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 724f10d..13c3f01 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -141,8 +141,41 @@ struct render_mmio { {RCS, _MMIO(0x20e4), 0x, false}, }; -static u32 gen9_render_mocs[I915_NUM_ENGINES][64]; -static u32 gen9_render_mocs_L3[32]; +static struct { + bool initialized; + u32 control_table[I915_NUM_ENGINES][64]; + u32 l3cc_table[32]; +} gen9_render_mocs; + +static int load_render_mocs(struct drm_i915_private *dev_priv) +{ + i915_reg_t offset; + u32 regs[] = { + [RCS] = 0xc800, + [VCS] = 0xc900, + [VCS2] = 0xca00, + [BCS] = 0xcc00, + [VECS] = 0xcb00, + }; + int ring_id, i; + + for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) { + offset.reg = regs[ring_id]; + for (i = 0; i < 64; i++) { + gen9_render_mocs.control_table[ring_id][i] = + I915_READ_FW(offset); + offset.reg += 4; + } + } + + offset.reg = 0xb020; + for (i = 0; i < 32; i++) { + gen9_render_mocs.l3cc_table[i] = + I915_READ_FW(offset); + offset.reg += 4; + } + gen9_render_mocs.initialized = true; +} static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) { @@ -210,18 +243,19 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) return; - offset.reg = regs[ring_id]; + if (!pre && !gen9_render_mocs.initialized) + load_render_mocs(dev_priv); + offset.reg = regs[ring_id]; for (i = 0; i < 64; i++) { if (pre) old_v = vgpu_vreg(pre, offset); else - old_v = gen9_render_mocs[ring_id][i] - = I915_READ_FW(offset); + old_v = gen9_render_mocs.control_table[ring_id][i]; if (next) new_v = vgpu_vreg(next, offset); else - new_v = gen9_render_mocs[ring_id][i]; + new_v = gen9_render_mocs.control_table[ring_id][i]; if (old_v != new_v) I915_WRITE_FW(offset, new_v); @@ -235,12 +269,11 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, if (pre) old_v = vgpu_vreg(pre, l3_offset); else - old_v = gen9_render_mocs_L3[i] - = I915_READ_FW(offset); + old_v = gen9_render_mocs.l3cc_table[i]; if (next) new_v = vgpu_vreg(next, l3_offset); else - new_v = gen9_render_mocs_L3[i]; + new_v = gen9_render_mocs.l3cc_table[i]; if (old_v != new_v) I915_WRITE_FW(l3_offset, new_v); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 0/3] mmio save restore refine in vgpu switch
Merge switch_mmio_to_vgpu and switch_mmio_to_host, use delta update for mocs save restore, deal host mocs value as fixed, it won't be changed after initialization. These can save vgpu switch time to reduce CPU utilization and improve GPU performance in GVT-g with multi-VMs. Weinan Li (3): drm/i915/gvt: optimize for vGPU mmio switch drm/i915/gvt: refine mocs save restore policy drm/i915/gvt: load host render mocs once in mocs switch drivers/gpu/drm/i915/gvt/cmd_parser.c | 19 +++ drivers/gpu/drm/i915/gvt/render.c | 252 +- drivers/gpu/drm/i915/gvt/trace.h | 15 +- 3 files changed, 152 insertions(+), 134 deletions(-) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/3] drm/i915/gvt: optimize for vGPU mmio switch
now mmio switch between vGPUs need to switch to host first then to expected vGPU, it waste one time mmio save/restore. r/w mmio usually is time-consuming, and there are so many mocs registers need to save/restore during vGPU switch. Combine the switch_to_host and switch_to_vgpu can reduce 1 time mmio save/restore, it will reduce the CPU utilization and performance while there is multi VMs with heavy work load. Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/gvt/render.c | 212 -- drivers/gpu/drm/i915/gvt/trace.h | 15 +-- 2 files changed, 95 insertions(+), 132 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index dac12c2..ec1e60d 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -190,9 +190,10 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) gvt_dbg_core("invalidate TLB for ring %d\n", ring_id); } -static void load_mocs(struct intel_vgpu *vgpu, int ring_id) +static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, + int ring_id) { - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + struct drm_i915_private *dev_priv; i915_reg_t offset, l3_offset; u32 regs[] = { [RCS] = 0xc800, @@ -203,54 +204,44 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id) }; int i; + dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv; if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) return; offset.reg = regs[ring_id]; - for (i = 0; i < 64; i++) { - gen9_render_mocs[ring_id][i] = I915_READ_FW(offset); - I915_WRITE_FW(offset, vgpu_vreg(vgpu, offset)); - offset.reg += 4; - } - - if (ring_id == RCS) { - l3_offset.reg = 0xb020; - for (i = 0; i < 32; i++) { - gen9_render_mocs_L3[i] = I915_READ_FW(l3_offset); - I915_WRITE_FW(l3_offset, vgpu_vreg(vgpu, l3_offset)); - l3_offset.reg += 4; - } - } -} -static void restore_mocs(struct intel_vgpu *vgpu, int ring_id) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - i915_reg_t offset, l3_offset; - u32 regs[] = { - [RCS] = 0xc800, - [VCS] = 0xc900, - [VCS2] = 0xca00, - [BCS] = 0xcc00, - [VECS] = 0xcb00, - }; - int i; + for (i = 0; i < 64; i++) { + if (pre) + vgpu_vreg(pre, offset) = + I915_READ_FW(offset); + else + gen9_render_mocs[ring_id][i] = + I915_READ_FW(offset); - if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) - return; + if (next) + I915_WRITE_FW(offset, vgpu_vreg(next, offset)); + else + I915_WRITE_FW(offset, gen9_render_mocs[ring_id][i]); - offset.reg = regs[ring_id]; - for (i = 0; i < 64; i++) { - vgpu_vreg(vgpu, offset) = I915_READ_FW(offset); - I915_WRITE_FW(offset, gen9_render_mocs[ring_id][i]); offset.reg += 4; } if (ring_id == RCS) { l3_offset.reg = 0xb020; for (i = 0; i < 32; i++) { - vgpu_vreg(vgpu, l3_offset) = I915_READ_FW(l3_offset); - I915_WRITE_FW(l3_offset, gen9_render_mocs_L3[i]); + if (pre) + vgpu_vreg(pre, l3_offset) = + I915_READ_FW(l3_offset); + else + gen9_render_mocs_L3[i] = + I915_READ_FW(l3_offset); + if (next) + I915_WRITE_FW(l3_offset, + vgpu_vreg(next, l3_offset)); + else + I915_WRITE_FW(l3_offset, + gen9_render_mocs_L3[i]); + l3_offset.reg += 4; } } @@ -258,78 +249,25 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id) #define CTX_CONTEXT_CONTROL_VAL0x03 -/* Switch ring mmio values (context) from host to a vgpu. */ -static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - struct intel_vgpu_submission *s = >submission; - u32 *reg_state = s->shadow_ctx->engine[ring_id].lrc_reg_state; - u32 ctx_ctrl = reg_state[CTX_CONTEXT
[Intel-gfx] [PATCH] drm/i915/gvt: remove skl_misc_ctl_write handler
With different settings of compressed data hash mode between VMs and host may cause gpu issues. Commit: 1999f108c ("drm/i915/gvt: Disable compression workaround for Gen9") disable compression workaround of guest in gvt host to align with host. Commit: 93564044f ("drm/i915: Switch over to the LLC/eLLC hotspot avoidance hash mode for CCS") add compression workaround, then we can remove the skl_misc_ctl_write hanlder. Better solution should be always keeping same settings as host, and bypass the write request from VMs, but it need to fetch data from host's "Context". Cc: Zhi Wang <zhi.a.w...@intel.com> Signed-off-by: Weinan Li <weinan.z...@intel.com> Signed-off-by: Xiong Zhang <xiong.y.zh...@intel.com> --- drivers/gpu/drm/i915/gvt/handlers.c | 45 + 1 file changed, 5 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 880448d..94fc0421 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1420,40 +1420,6 @@ static int skl_power_well_ctl_write(struct intel_vgpu *vgpu, return intel_vgpu_default_mmio_write(vgpu, offset, , bytes); } -static int skl_misc_ctl_write(struct intel_vgpu *vgpu, unsigned int offset, - void *p_data, unsigned int bytes) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - u32 v = *(u32 *)p_data; - - if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) - return intel_vgpu_default_mmio_write(vgpu, - offset, p_data, bytes); - - switch (offset) { - case 0x4ddc: - /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 31); - break; - case 0x42080: - /* bypass WaCompressedResourceDisplayNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 15); - break; - case 0xe194: - /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 8); - break; - case 0x7014: - /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 13); - break; - default: - return -EINVAL; - } - - return 0; -} - static int skl_lcpll_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -1740,8 +1706,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, -skl_misc_ctl_write); + MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, +NULL, NULL); MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL); @@ -2633,8 +2599,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(0x6e570, D_BDW_PLUS); MMIO_D(0x65f10, D_BDW_PLUS); - MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, -skl_misc_ctl_write); + MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); @@ -2684,8 +2649,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_DFH(GEN9_GAMT_ECO_REG_RW_IA, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, skl_misc_ctl_write); - MMIO_DH(0x42080, D_SKL_PLUS, NULL, skl_misc_ctl_write); + MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, NULL); + MMIO_DH(0x42080, D_SKL_PLUS, NULL, NULL); MMIO_D(0x45504, D_SKL_PLUS); MMIO_D(0x45520, D_SKL_PLUS); MMIO_D(0x46000, D_SKL_PLUS); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v6] drm/i915: enable to read CSB and CSB write pointer from HWSP in GVT-g VM
Let GVT-g VM read the CSB and CSB write pointer from virtual HWSP, not all the host support this feature, need to check the BIT(3) of caps in PVINFO. v3 : Remove unnecessary comments. v4 : Separate VM enable patch with GVT-g implementation patch due to code dependency. v5 : Use inline for GVT virtual HWSP caps check function. v6 : Comments refine. Signed-off-by: Weinan Li <weinan.z...@intel.com> Cc: Chris Wilson <ch...@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com> --- drivers/gpu/drm/i915/i915_pvinfo.h | 1 + drivers/gpu/drm/i915/i915_vgpu.h | 6 ++ drivers/gpu/drm/i915/intel_engine_cs.c | 9 + drivers/gpu/drm/i915/intel_lrc.c | 1 - 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h b/drivers/gpu/drm/i915/i915_pvinfo.h index 0679a58..195203f 100644 --- a/drivers/gpu/drm/i915/i915_pvinfo.h +++ b/drivers/gpu/drm/i915/i915_pvinfo.h @@ -53,6 +53,7 @@ enum vgt_g2v_type { * VGT capabilities type */ #define VGT_CAPS_FULL_48BIT_PPGTT BIT(2) +#define VGT_CAPS_HWSP_EMULATIONBIT(3) struct vgt_if { u64 magic; /* VGT_MAGIC */ diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h index b72bd29..bb83384 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.h +++ b/drivers/gpu/drm/i915/i915_vgpu.h @@ -30,6 +30,12 @@ bool intel_vgpu_has_full_48bit_ppgtt(struct drm_i915_private *dev_priv); +static inline bool +intel_vgpu_has_hwsp_emulation(struct drm_i915_private *dev_priv) +{ + return dev_priv->vgpu.caps & VGT_CAPS_HWSP_EMULATION; +} + int intel_vgt_balloon(struct drm_i915_private *dev_priv); void intel_vgt_deballoon(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index a59b2a3..83e696f 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -25,6 +25,7 @@ #include #include "i915_drv.h" +#include "i915_vgpu.h" #include "intel_ringbuffer.h" #include "intel_lrc.h" @@ -386,10 +387,6 @@ static void intel_engine_init_timeline(struct intel_engine_cs *engine) static bool csb_force_mmio(struct drm_i915_private *i915) { - /* GVT emulation depends upon intercepting CSB mmio */ - if (intel_vgpu_active(i915)) - return true; - /* * IOMMU adds unpredictable latency causing the CSB write (from the * GPU into the HWSP) to only be visible some time after the interrupt @@ -398,6 +395,10 @@ static bool csb_force_mmio(struct drm_i915_private *i915) if (intel_vtd_active()) return true; + /* Older GVT emulation depends upon intercepting CSB mmio */ + if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) + return true; + return false; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index fbfcf88..766552f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -793,7 +793,6 @@ static void intel_lrc_irq_handler(unsigned long data) >status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; unsigned int head, tail; - /* However GVT emulation depends upon intercepting CSB mmio */ if (unlikely(execlists->csb_use_mmio)) { buf = (u32 * __force) (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v5] drm/i915: enable to read CSB and CSB write pointer from HWSP in GVT-g VM
Let GVT-g VM read the CSB and CSB write pointer from virtual HWSP, not all the host support this feature, need to check the BIT(3) of caps in PVINFO. v3 : Remove unnecessary comments. v4 : Separate VM enable patch with GVT-g implementation patch due to code dependency. v5 : Use inline for GVT virtual HWSP caps check function. Signed-off-by: Weinan Li <weinan.z...@intel.com> Cc: Chris Wilson <ch...@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com> --- drivers/gpu/drm/i915/i915_pvinfo.h | 1 + drivers/gpu/drm/i915/i915_vgpu.h | 6 ++ drivers/gpu/drm/i915/intel_engine_cs.c | 8 drivers/gpu/drm/i915/intel_lrc.c | 1 - 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h b/drivers/gpu/drm/i915/i915_pvinfo.h index 0679a58..195203f 100644 --- a/drivers/gpu/drm/i915/i915_pvinfo.h +++ b/drivers/gpu/drm/i915/i915_pvinfo.h @@ -53,6 +53,7 @@ enum vgt_g2v_type { * VGT capabilities type */ #define VGT_CAPS_FULL_48BIT_PPGTT BIT(2) +#define VGT_CAPS_HWSP_EMULATIONBIT(3) struct vgt_if { u64 magic; /* VGT_MAGIC */ diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h index b72bd29..bb83384 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.h +++ b/drivers/gpu/drm/i915/i915_vgpu.h @@ -30,6 +30,12 @@ bool intel_vgpu_has_full_48bit_ppgtt(struct drm_i915_private *dev_priv); +static inline bool +intel_vgpu_has_hwsp_emulation(struct drm_i915_private *dev_priv) +{ + return dev_priv->vgpu.caps & VGT_CAPS_HWSP_EMULATION; +} + int intel_vgt_balloon(struct drm_i915_private *dev_priv); void intel_vgt_deballoon(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index a59b2a3..457ebe0 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -25,6 +25,7 @@ #include #include "i915_drv.h" +#include "i915_vgpu.h" #include "intel_ringbuffer.h" #include "intel_lrc.h" @@ -386,10 +387,6 @@ static void intel_engine_init_timeline(struct intel_engine_cs *engine) static bool csb_force_mmio(struct drm_i915_private *i915) { - /* GVT emulation depends upon intercepting CSB mmio */ - if (intel_vgpu_active(i915)) - return true; - /* * IOMMU adds unpredictable latency causing the CSB write (from the * GPU into the HWSP) to only be visible some time after the interrupt @@ -398,6 +395,9 @@ static bool csb_force_mmio(struct drm_i915_private *i915) if (intel_vtd_active()) return true; + if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) + return true; + return false; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index fbfcf88..766552f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -793,7 +793,6 @@ static void intel_lrc_irq_handler(unsigned long data) >status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; unsigned int head, tail; - /* However GVT emulation depends upon intercepting CSB mmio */ if (unlikely(execlists->csb_use_mmio)) { buf = (u32 * __force) (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v4] drm/i915: enable to read CSB and CSB write pointer from HWSP in GVT-g VM
Let GVT-g VM read the CSB and CSB write pointer from virtual HWSP, not all the host support this feature, need to check the BIT(3) of caps in PVINFO. v3 : Remove unnecessary comments. v4 : Separate VM enable patch with GVT-g implementation patch due to code dependency Signed-off-by: Weinan Li <weinan.z...@intel.com> Cc: Chris Wilson <ch...@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com> --- drivers/gpu/drm/i915/i915_pvinfo.h | 1 + drivers/gpu/drm/i915/i915_vgpu.c | 5 + drivers/gpu/drm/i915/i915_vgpu.h | 1 + drivers/gpu/drm/i915/intel_engine_cs.c | 8 drivers/gpu/drm/i915/intel_lrc.c | 1 - 5 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h b/drivers/gpu/drm/i915/i915_pvinfo.h index 0679a58..195203f 100644 --- a/drivers/gpu/drm/i915/i915_pvinfo.h +++ b/drivers/gpu/drm/i915/i915_pvinfo.h @@ -53,6 +53,7 @@ enum vgt_g2v_type { * VGT capabilities type */ #define VGT_CAPS_FULL_48BIT_PPGTT BIT(2) +#define VGT_CAPS_HWSP_EMULATIONBIT(3) struct vgt_if { u64 magic; /* VGT_MAGIC */ diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 5fe9f3f..6f713c5 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -86,6 +86,11 @@ bool intel_vgpu_has_full_48bit_ppgtt(struct drm_i915_private *dev_priv) return dev_priv->vgpu.caps & VGT_CAPS_FULL_48BIT_PPGTT; } +bool intel_vgpu_has_hwsp_emulation(struct drm_i915_private *dev_priv) +{ + return dev_priv->vgpu.caps & VGT_CAPS_HWSP_EMULATION; +} + struct _balloon_info_ { /* * There are up to 2 regions per mappable/unmappable graphic diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h index b72bd29..cec0ec1 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.h +++ b/drivers/gpu/drm/i915/i915_vgpu.h @@ -29,6 +29,7 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv); bool intel_vgpu_has_full_48bit_ppgtt(struct drm_i915_private *dev_priv); +bool intel_vgpu_has_hwsp_emulation(struct drm_i915_private *dev_priv); int intel_vgt_balloon(struct drm_i915_private *dev_priv); void intel_vgt_deballoon(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index a59b2a3..457ebe0 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -25,6 +25,7 @@ #include #include "i915_drv.h" +#include "i915_vgpu.h" #include "intel_ringbuffer.h" #include "intel_lrc.h" @@ -386,10 +387,6 @@ static void intel_engine_init_timeline(struct intel_engine_cs *engine) static bool csb_force_mmio(struct drm_i915_private *i915) { - /* GVT emulation depends upon intercepting CSB mmio */ - if (intel_vgpu_active(i915)) - return true; - /* * IOMMU adds unpredictable latency causing the CSB write (from the * GPU into the HWSP) to only be visible some time after the interrupt @@ -398,6 +395,9 @@ static bool csb_force_mmio(struct drm_i915_private *i915) if (intel_vtd_active()) return true; + if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) + return true; + return false; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index fbfcf88..766552f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -793,7 +793,6 @@ static void intel_lrc_irq_handler(unsigned long data) >status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; unsigned int head, tail; - /* However GVT emulation depends upon intercepting CSB mmio */ if (unlikely(execlists->csb_use_mmio)) { buf = (u32 * __force) (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 0/2] enable virtual HWSP in GVT-g
v2 : clean merge confict v3 : remove unnecessary comments add address audit in HWSP address update Weinan Li (2): drm/i915/gvt: update CSB and CSB write pointer in virtual HWSP drm/i915: enable to read CSB and CSB write pointer from HWSP in GVT-g VM drivers/gpu/drm/i915/gvt/execlist.c| 17 + drivers/gpu/drm/i915/gvt/gvt.h | 1 + drivers/gpu/drm/i915/gvt/handlers.c| 45 +- drivers/gpu/drm/i915/gvt/vgpu.c| 8 ++ drivers/gpu/drm/i915/i915_pvinfo.h | 1 + drivers/gpu/drm/i915/i915_vgpu.c | 5 drivers/gpu/drm/i915/i915_vgpu.h | 1 + drivers/gpu/drm/i915/intel_engine_cs.c | 8 +++--- drivers/gpu/drm/i915/intel_lrc.c | 1 - 9 files changed, 81 insertions(+), 6 deletions(-) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 1/2] drm/i915/gvt: update CSB and CSB write pointer in virtual HWSP
The engine provides a mirror of the CSB and CSB write pointer in the HWSP. Read these status from virtual HWSP in VM can reduce CPU utilization while applications have much more short GPU workloads. Here we update the corresponding data in virtual HWSP as it in virtual MMIO. Before read these status from HWSP in GVT-g VM, please ensure the host support it by checking the BIT(3) of caps in PVINFO. Virtual HWSP only support GEN8+ platform, since the HWSP MMIO may change follow the platform update, please add the corresponding MMIO emulation when enable new platforms in GVT-g. v3 : Add address audit in HWSP address update. Signed-off-by: Weinan Li <weinan.z...@intel.com> Cc: Chris Wilson <ch...@chris-wilson.co.uk> --- drivers/gpu/drm/i915/gvt/execlist.c | 17 ++ drivers/gpu/drm/i915/gvt/gvt.h | 1 + drivers/gpu/drm/i915/gvt/handlers.c | 45 - drivers/gpu/drm/i915/gvt/vgpu.c | 8 +++ drivers/gpu/drm/i915/i915_pvinfo.h | 1 + 5 files changed, 71 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 5c966ed..7bc7286 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -133,6 +133,8 @@ static void emulate_csb_update(struct intel_vgpu_execlist *execlist, struct execlist_context_status_pointer_format ctx_status_ptr; u32 write_pointer; u32 ctx_status_ptr_reg, ctx_status_buf_reg, offset; + unsigned long hwsp_gpa; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id, _EL_OFFSET_STATUS_PTR); @@ -158,6 +160,21 @@ static void emulate_csb_update(struct intel_vgpu_execlist *execlist, ctx_status_ptr.write_ptr = write_pointer; vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw; + /* Update the CSB and CSB write pointer in HWSP */ + if (INTEL_INFO(dev_priv)->gen >= 8) { + hwsp_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, +vgpu->hws_pga[ring_id]); + if (hwsp_gpa != INTEL_GVT_INVALID_ADDR) { + intel_gvt_hypervisor_write_gpa(vgpu, + hwsp_gpa + I915_HWS_CSB_BUF0_INDEX * 4 + + write_pointer * 8, + status, 8); + intel_gvt_hypervisor_write_gpa(vgpu, + hwsp_gpa + + intel_hws_csb_write_index(dev_priv) * 4, + _pointer, 4); + } + } gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n", vgpu->id, write_pointer, offset, status->ldw, status->udw); diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index f08d194..27e8186 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -189,6 +189,7 @@ struct intel_vgpu { struct intel_vgpu_opregion opregion; struct intel_vgpu_display display; struct intel_vgpu_submission submission; + u32 hws_pga[I915_NUM_ENGINES]; struct dentry *debugfs; diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 8ef7db3..6313f0e 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1380,6 +1380,49 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, return intel_vgpu_default_mmio_write(vgpu, offset, , bytes); } +static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset, + void *p_data, unsigned int bytes) +{ + u32 value = *(u32 *)p_data; + + if (!intel_gvt_ggtt_validate_range(vgpu, value, GTT_PAGE_SIZE)) { + gvt_vgpu_err("VM(%d) write invalid HWSP address, reg:0x%x, value:0x%x\n", + vgpu->id, offset, value); + return -EINVAL; + } + /* +* Need to emulate all the HWSP register write to ensure host can +* update the VM CSB status correctly. Here listed registers can +* support BDW, SKL or other platforms with same HWSP registers. +*/ + switch (offset) { + case 0x2080: + vgpu->hws_pga[RCS] = value; + break; + case 0x12080: + vgpu->hws_pga[VCS] = value; + break; + case 0x1c080: + vgpu->hws_pga[VCS2] = value; + break; + case 0x1a080: + vgpu->hws_pga[VECS] = value; + break; + case 0x22080: + vgpu->hws_pga[BCS] = value; + break; + default: + gvt_vgpu_err("VM(%d) access unknown hardware status page register:0x
[Intel-gfx] [PATCH v3 2/2] drm/i915: enable to read CSB and CSB write pointer from HWSP in GVT-g VM
Let GVT-g VM read the CSB and CSB write pointer from virtual HWSP, not all the host support this feature, need to check the BIT(3) of caps in PVINFO. v3 : Remove unnecessary comments. Signed-off-by: Weinan Li <weinan.z...@intel.com> Cc: Chris Wilson <ch...@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com> --- drivers/gpu/drm/i915/i915_vgpu.c | 5 + drivers/gpu/drm/i915/i915_vgpu.h | 1 + drivers/gpu/drm/i915/intel_engine_cs.c | 8 drivers/gpu/drm/i915/intel_lrc.c | 1 - 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 5fe9f3f..6f713c5 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -86,6 +86,11 @@ bool intel_vgpu_has_full_48bit_ppgtt(struct drm_i915_private *dev_priv) return dev_priv->vgpu.caps & VGT_CAPS_FULL_48BIT_PPGTT; } +bool intel_vgpu_has_hwsp_emulation(struct drm_i915_private *dev_priv) +{ + return dev_priv->vgpu.caps & VGT_CAPS_HWSP_EMULATION; +} + struct _balloon_info_ { /* * There are up to 2 regions per mappable/unmappable graphic diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h index b72bd29..cec0ec1 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.h +++ b/drivers/gpu/drm/i915/i915_vgpu.h @@ -29,6 +29,7 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv); bool intel_vgpu_has_full_48bit_ppgtt(struct drm_i915_private *dev_priv); +bool intel_vgpu_has_hwsp_emulation(struct drm_i915_private *dev_priv); int intel_vgt_balloon(struct drm_i915_private *dev_priv); void intel_vgt_deballoon(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 807a7aa..93478c2 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -23,6 +23,7 @@ */ #include "i915_drv.h" +#include "i915_vgpu.h" #include "intel_ringbuffer.h" #include "intel_lrc.h" @@ -384,10 +385,6 @@ static void intel_engine_init_timeline(struct intel_engine_cs *engine) static bool csb_force_mmio(struct drm_i915_private *i915) { - /* GVT emulation depends upon intercepting CSB mmio */ - if (intel_vgpu_active(i915)) - return true; - /* * IOMMU adds unpredictable latency causing the CSB write (from the * GPU into the HWSP) to only be visible some time after the interrupt @@ -396,6 +393,9 @@ static bool csb_force_mmio(struct drm_i915_private *i915) if (intel_vtd_active()) return true; + if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) + return true; + return false; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 721432d..3c91971 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -793,7 +793,6 @@ static void intel_lrc_irq_handler(unsigned long data) >status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; unsigned int head, tail; - /* However GVT emulation depends upon intercepting CSB mmio */ if (unlikely(execlists->csb_use_mmio)) { buf = (u32 * __force) (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 2/2] drm/i915: enable to read CSB and CSB write pointer from HWSP in GVT-g VM
Let GVT-g VM read the CSB and CSB write pointer from virtual HWSP, not all the host support this feature, need to check the BIT(3) of caps in PVINFO. Signed-off-by: Weinan Li <weinan.z...@intel.com> Cc: Chris Wilson <ch...@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_vgpu.c | 5 + drivers/gpu/drm/i915/i915_vgpu.h | 1 + drivers/gpu/drm/i915/intel_engine_cs.c | 11 +++ drivers/gpu/drm/i915/intel_lrc.c | 7 ++- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 5fe9f3f..6f713c5 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -86,6 +86,11 @@ bool intel_vgpu_has_full_48bit_ppgtt(struct drm_i915_private *dev_priv) return dev_priv->vgpu.caps & VGT_CAPS_FULL_48BIT_PPGTT; } +bool intel_vgpu_has_hwsp_emulation(struct drm_i915_private *dev_priv) +{ + return dev_priv->vgpu.caps & VGT_CAPS_HWSP_EMULATION; +} + struct _balloon_info_ { /* * There are up to 2 regions per mappable/unmappable graphic diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h index b72bd29..cec0ec1 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.h +++ b/drivers/gpu/drm/i915/i915_vgpu.h @@ -29,6 +29,7 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv); bool intel_vgpu_has_full_48bit_ppgtt(struct drm_i915_private *dev_priv); +bool intel_vgpu_has_hwsp_emulation(struct drm_i915_private *dev_priv); int intel_vgt_balloon(struct drm_i915_private *dev_priv); void intel_vgt_deballoon(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index a28e2a8..58945ef 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -23,6 +23,7 @@ */ #include "i915_drv.h" +#include "i915_vgpu.h" #include "intel_ringbuffer.h" #include "intel_lrc.h" @@ -384,10 +385,6 @@ static void intel_engine_init_timeline(struct intel_engine_cs *engine) static bool csb_force_mmio(struct drm_i915_private *i915) { - /* GVT emulation depends upon intercepting CSB mmio */ - if (intel_vgpu_active(i915)) - return true; - /* * IOMMU adds unpredictable latency causing the CSB write (from the * GPU into the HWSP) to only be visible some time after the interrupt @@ -396,6 +393,12 @@ static bool csb_force_mmio(struct drm_i915_private *i915) if (intel_vtd_active()) return true; + /* GVT emulation depends upon host kernel implementation, check +* support capbility by reading PV INFO before access HWSP. +*/ + if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) + return true; + return false; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 7d6da13..2313d0a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -722,7 +722,12 @@ static void intel_lrc_irq_handler(unsigned long data) >status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; unsigned int head, tail; - /* However GVT emulation depends upon intercepting CSB mmio */ + /* However GVT-g emulation depends upon host kernel +* implementation, need to check support capbility by reading PV +* INFO before access HWSP. Beside from this, another special +* configuration may also need to force use mmio, like IOMMU +* enabled. +*/ if (unlikely(execlists->csb_use_mmio)) { buf = (u32 * __force) (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 0/2] enable virtual HWSP in GVT-g
V2: clean merge confict. Weinan Li (2): drm/i915/gvt: update CSB and CSB write pointer in virtual HWSP drm/i915: enable to read CSB and CSB write pointer from HWSP in GVT-g VM drivers/gpu/drm/i915/gvt/execlist.c| 16 + drivers/gpu/drm/i915/gvt/gvt.h | 1 + drivers/gpu/drm/i915/gvt/handlers.c| 42 -- drivers/gpu/drm/i915/gvt/vgpu.c| 8 +++ drivers/gpu/drm/i915/i915_pvinfo.h | 1 + drivers/gpu/drm/i915/i915_vgpu.c | 5 drivers/gpu/drm/i915/i915_vgpu.h | 1 + drivers/gpu/drm/i915/intel_engine_cs.c | 11 + drivers/gpu/drm/i915/intel_lrc.c | 7 +- 9 files changed, 85 insertions(+), 7 deletions(-) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 1/2] drm/i915/gvt: update CSB and CSB write pointer in virtual HWSP
The engine provides a mirror of the CSB and CSB write pointer in the HWSP. Read these status from virtual HWSP in VM can reduce CPU utilization while applications have much more short GPU workloads. Here we update the corresponding data in virtual HWSP as it in virtual MMIO. Before read these status from HWSP in GVT-g VM, please ensure the host support it by checking the BIT(3) of caps in PVINFO. Virtual HWSP only support GEN8+ platform, since the HWSP MMIO may change follow the platform update, please add the corresponding MMIO emulation when enable new platforms in GVT-g. Signed-off-by: Weinan Li <weinan.z...@intel.com> Cc: Chris Wilson <ch...@chris-wilson.co.uk> --- drivers/gpu/drm/i915/gvt/execlist.c | 16 ++ drivers/gpu/drm/i915/gvt/gvt.h | 1 + drivers/gpu/drm/i915/gvt/handlers.c | 42 +++-- drivers/gpu/drm/i915/gvt/vgpu.c | 8 +++ drivers/gpu/drm/i915/i915_pvinfo.h | 1 + 5 files changed, 66 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 5c966ed..b0c7e9e 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -133,6 +133,8 @@ static void emulate_csb_update(struct intel_vgpu_execlist *execlist, struct execlist_context_status_pointer_format ctx_status_ptr; u32 write_pointer; u32 ctx_status_ptr_reg, ctx_status_buf_reg, offset; + unsigned long hwsp_gpa; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id, _EL_OFFSET_STATUS_PTR); @@ -158,6 +160,20 @@ static void emulate_csb_update(struct intel_vgpu_execlist *execlist, ctx_status_ptr.write_ptr = write_pointer; vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw; + /* +* Update the CSB and CSB write pointer in HWSP. +*/ + if (INTEL_INFO(dev_priv)->gen >= 8) { + hwsp_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, +vgpu->hws_pga[ring_id]); + intel_gvt_hypervisor_write_gpa(vgpu, + hwsp_gpa + I915_HWS_CSB_BUF0_INDEX * 4 + + write_pointer * 8, + status, 8); + intel_gvt_hypervisor_write_gpa(vgpu, + hwsp_gpa + intel_hws_csb_write_index(dev_priv) * 4, + _pointer, 4); + } gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n", vgpu->id, write_pointer, offset, status->ldw, status->udw); diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index f08d194..27e8186 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -189,6 +189,7 @@ struct intel_vgpu { struct intel_vgpu_opregion opregion; struct intel_vgpu_display display; struct intel_vgpu_submission submission; + u32 hws_pga[I915_NUM_ENGINES]; struct dentry *debugfs; diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 0fa61a7..c1e79dd 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1371,6 +1371,43 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, return intel_vgpu_default_mmio_write(vgpu, offset, , bytes); } +static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset, + void *p_data, unsigned int bytes) +{ + u32 value = *(u32 *)p_data; + /* +* Need to emulate all the HWSP register write to ensure host can +* update the VM CSB status correctly. Here listed registers can +* support BDW, SKL or other platforms with same HWSP registers. +*/ + switch (offset) { + case 0x2080: + vgpu->hws_pga[RCS] = value; + break; + case 0x12080: + vgpu->hws_pga[VCS] = value; + break; + case 0x1c080: + vgpu->hws_pga[VCS2] = value; + break; + case 0x1a080: + vgpu->hws_pga[VECS] = value; + break; + case 0x22080: + vgpu->hws_pga[BCS] = value; + break; + default: + gvt_vgpu_err("VM(%d) access unknown hardware status page register:0x%x\n", +vgpu->id, offset); + return -EINVAL; + } + + gvt_dbg_mmio("VM(%d) write: 0x%x to HWSP: 0x%x\n", +vgpu->id, value, offset); + + return intel_vgpu_default_mmio_write(vgpu, offset, , bytes); +} + static int skl_power_well_ctl_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -2574,8 +2611,9 @@ static int init_b
[Intel-gfx] [PATCH 1/2] drm/i915/gvt: update CSB and CSB write pointer in virtual HWSP
The engine provides a mirror of the CSB and CSB write pointer in the HWSP. Read these status from virtual HWSP in VM can reduce CPU utilization while applications have much more short GPU workloads. Here we update the corresponding data in virtual HWSP as it in virtual MMIO. Before read these status from HWSP in GVT-g VM, please ensure the host support it by checking the BIT(3) of caps in PVINFO. Virtual HWSP only support GEN8+ platform, since the HWSP MMIO may change follow the platform update, please add the corresponding MMIO emulation when enable new platforms in GVT-g. Signed-off-by: Weinan Li <weinan.z...@intel.com> Cc: Chris Wilson <ch...@chris-wilson.co.uk> --- drivers/gpu/drm/i915/gvt/execlist.c | 14 + drivers/gpu/drm/i915/gvt/gvt.h | 1 + drivers/gpu/drm/i915/gvt/handlers.c | 41 +++-- drivers/gpu/drm/i915/gvt/vgpu.c | 8 drivers/gpu/drm/i915/i915_pvinfo.h | 1 + 5 files changed, 63 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 5c966ed..8227ea2 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -133,6 +133,8 @@ static void emulate_csb_update(struct intel_vgpu_execlist *execlist, struct execlist_context_status_pointer_format ctx_status_ptr; u32 write_pointer; u32 ctx_status_ptr_reg, ctx_status_buf_reg, offset; + unsigned long hwsp_gpa; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id, _EL_OFFSET_STATUS_PTR); @@ -158,6 +160,18 @@ static void emulate_csb_update(struct intel_vgpu_execlist *execlist, ctx_status_ptr.write_ptr = write_pointer; vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw; + /* +* Update the CSB and CSB write pointer in HWSP. +*/ + if (INTEL_INFO(dev_priv)->gen >= 8) { + hwsp_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, vgpu->hws_pga[ring_id]); + intel_gvt_hypervisor_write_gpa(vgpu, + hwsp_gpa + I915_HWS_CSB_BUF0_INDEX * 4 + write_pointer * 8, + status, 8); + intel_gvt_hypervisor_write_gpa(vgpu, + hwsp_gpa + intel_hws_csb_write_index(dev_priv) * 4, + _pointer, 4); + } gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n", vgpu->id, write_pointer, offset, status->ldw, status->udw); diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index b7253d7..8b64f25 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -189,6 +189,7 @@ struct intel_vgpu { struct intel_vgpu_opregion opregion; struct intel_vgpu_display display; struct intel_vgpu_submission submission; + u32 hws_pga[I915_NUM_ENGINES]; #if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) struct { diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 87985c7..887bdf5 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1371,6 +1371,43 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, return intel_vgpu_default_mmio_write(vgpu, offset, , bytes); } +static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset, + void *p_data, unsigned int bytes) +{ + u32 value = *(u32 *)p_data; + /* +* Need to emulate all the HWSP register write to ensure host can +* update the VM CSB status correctly. Here listed registers can +* support BDW, SKL or other platforms with same HWSP registers. +*/ + switch (offset) { + case 0x2080: + vgpu->hws_pga[RCS] = value; + break; + case 0x12080: + vgpu->hws_pga[VCS] = value; + break; + case 0x1c080: + vgpu->hws_pga[VCS2] = value; + break; + case 0x1a080: + vgpu->hws_pga[VECS] = value; + break; + case 0x22080: + vgpu->hws_pga[BCS] = value; + break; + default: + gvt_vgpu_err("VM(%d) access unkown hardware status page register:0x%x\n", +vgpu->id, offset); + return -EINVAL; + } + + gvt_dbg_mmio("VM(%d) write: 0x%x to HWSP: 0x%x\n", +vgpu->id, value, offset); + + return intel_vgpu_default_mmio_write(vgpu, offset, , bytes); +} + static int skl_power_well_ctl_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -2574,8 +2611,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
[Intel-gfx] [PATCH 2/2] drm/i915: enable to read CSB and CSB write pointer from HWSP in GVT-g VM
Let GVT-g VM read the CSB and CSB write pointer from virtual HWSP, not all the host support this feature, need to check the BIT(3) of caps in PVINFO. Signed-off-by: Weinan Li <weinan.z...@intel.com> Cc: Chris Wilson <ch...@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_vgpu.c | 5 + drivers/gpu/drm/i915/i915_vgpu.h | 1 + drivers/gpu/drm/i915/intel_lrc.c | 18 +- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 5fe9f3f..6f713c5 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -86,6 +86,11 @@ bool intel_vgpu_has_full_48bit_ppgtt(struct drm_i915_private *dev_priv) return dev_priv->vgpu.caps & VGT_CAPS_FULL_48BIT_PPGTT; } +bool intel_vgpu_has_hwsp_emulation(struct drm_i915_private *dev_priv) +{ + return dev_priv->vgpu.caps & VGT_CAPS_HWSP_EMULATION; +} + struct _balloon_info_ { /* * There are up to 2 regions per mappable/unmappable graphic diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h index b72bd29..cec0ec1 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.h +++ b/drivers/gpu/drm/i915/i915_vgpu.h @@ -29,6 +29,7 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv); bool intel_vgpu_has_full_48bit_ppgtt(struct drm_i915_private *dev_priv); +bool intel_vgpu_has_hwsp_emulation(struct drm_i915_private *dev_priv); int intel_vgt_balloon(struct drm_i915_private *dev_priv); void intel_vgt_deballoon(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 955c879..cd2af7b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -136,6 +136,7 @@ #include #include #include "i915_drv.h" +#include "i915_vgpu.h" #include "intel_mocs.h" #define RING_EXECLIST_QFULL(1 << 0x2) @@ -664,7 +665,12 @@ static void intel_lrc_irq_handler(unsigned long data) >status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; unsigned int head, tail; - /* However GVT emulation depends upon intercepting CSB mmio */ + /* However GVT-g emulation depends upon host kernel +* implementation, need to check support capbility by reading PV +* INFO before access HWSP. Beside from this, another special +* configuration may also need to force use mmio, like IOMMU +* enabled. +*/ if (unlikely(engine->csb_use_mmio)) { buf = (u32 * __force) (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); @@ -1780,10 +1786,6 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) static bool irq_handler_force_mmio(struct drm_i915_private *i915) { - /* GVT emulation depends upon intercepting CSB mmio */ - if (intel_vgpu_active(i915)) - return true; - /* * IOMMU adds unpredictable latency causing the CSB write (from the * GPU into the HWSP) to only be visible some time after the interrupt @@ -1792,6 +1794,12 @@ static bool irq_handler_force_mmio(struct drm_i915_private *i915) if (intel_vtd_active()) return true; + /* GVT emulation depends upon host kernel implementation, check +* support capbility by reading PV INFO before access HWSP. +*/ + if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) + return true; + return false; } -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v7] drm/i915: return the correct usable aperture size under gvt environment
I915_GEM_GET_APERTURE ioctl is used to probe aperture size from userspace. In gvt environment, each vm only use the ballooned part of aperture, so we should return the correct available aperture size exclude the reserved part by balloon. v2: add 'reserved' in struct i915_address_space to record the reserved size in ggtt (Chris) v3: remain aper_size as total, adjust aper_available_size exclude reserved and pinned. UMD driver need to adjust the max allocation size according to the available aperture size but not total size. KMD return the correct usable aperture size any time (Chris, Joonas) v4: decrease reserved in deballoon (Joonas) v5: add onion teardown in balloon, add vgt_deballoon_space (Joonas) v6: change title name (Zhenyu) v7: code style refine (Joonas) Suggested-by: Chris Wilson <ch...@chris-wilson.co.uk> Suggested-by: Joonas Lahtinen <joonas.lahti...@linux.intel.com> Cc: Chris Wilson <ch...@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com> Cc: Zhenyu Wang <zhen...@linux.intel.com> Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + drivers/gpu/drm/i915/i915_vgpu.c| 37 ++--- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 02adf82..fabddb4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -156,8 +156,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) mutex_unlock(>struct_mutex); args->aper_size = ggtt->base.total; - args->aper_available_size = args->aper_size - pinned; - + args->aper_available_size = args->aper_size - ggtt->base.reserved - + pinned; return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index fb15684..da9aa9f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -255,6 +255,7 @@ struct i915_address_space { struct drm_i915_file_private *file; struct list_head global_link; u64 total; /* size addr space maps (ex. 2GB for ggtt) */ + u64 reserved; /* size addr space reserved */ bool closed; diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 4ab8a97..efbd37f 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -92,6 +92,16 @@ struct _balloon_info_ { static struct _balloon_info_ bl_info; +static void vgt_deballoon_space(struct i915_ggtt *ggtt, + struct drm_mm_node *node) +{ + DRM_INFO("deballoon space: range [ 0x%llx - 0x%llx ] %llu KiB.\n", +node->start, node->start + node->size, node->size / 1024); + + ggtt->base.reserved -= node->size; + drm_mm_remove_node(node); +} + /** * intel_vgt_deballoon - deballoon reserved graphics address trunks * @dev_priv: i915 device private data @@ -108,18 +118,15 @@ void intel_vgt_deballoon(struct drm_i915_private *dev_priv) DRM_DEBUG("VGT deballoon.\n"); - for (i = 0; i < 4; i++) { - if (bl_info.space[i].allocated) - drm_mm_remove_node(_info.space[i]); - } - - memset(_info, 0, sizeof(bl_info)); + for (i = 0; i < 4; i++) + vgt_deballoon_space(_priv->ggtt, _info.space[i]); } static int vgt_balloon_space(struct i915_ggtt *ggtt, struct drm_mm_node *node, unsigned long start, unsigned long end) { + int ret; unsigned long size = end - start; if (start >= end) @@ -127,9 +134,12 @@ static int vgt_balloon_space(struct i915_ggtt *ggtt, DRM_INFO("balloon space: range [ 0x%lx - 0x%lx ] %lu KiB.\n", start, end, size / 1024); - return i915_gem_gtt_reserve(>base, node, + ret = i915_gem_gtt_reserve(>base, node, size, start, I915_COLOR_UNEVICTABLE, 0); + if (!ret) + ggtt->base.reserved += size; + return ret; } /** @@ -222,7 +232,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) ret = vgt_balloon_space(ggtt, _info.space[3], unmappable_end, ggtt_end); if (ret) - goto err; + goto err_upon_mappable; } /* Mappable graphic memory ballooning */ @@ -231,7 +241,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) 0, mappable_base); if (ret) - goto err;
[Intel-gfx] [PATCH v6] drm/i915: return the correct usable aperture size under gvt environment
I915_GEM_GET_APERTURE ioctl is used to probe aperture size from userspace. In gvt environment, each vm only use the ballooned part of aperture, so we should return the correct available aperture size exclude the reserved part by balloon. v2: add 'reserved' in struct i915_address_space to record the reserved size in ggtt (Chris) v3: remain aper_size as total, adjust aper_available_size exclude reserved and pinned. UMD driver need to adjust the max allocation size according to the available aperture size but not total size. KMD return the correct usable aperture size any time (Chris, Joonas) v4: decrease reserved in deballoon (Joonas) v5: add onion teardown in balloon, add vgt_deballoon_space (Joonas) v6: change title name (Zhenyu) Suggested-by: Chris Wilson <ch...@chris-wilson.co.uk> Suggested-by: Joonas Lahtinen <joonas.lahti...@linux.intel.com> Cc: Chris Wilson <ch...@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com> Cc: Zhenyu Wang <zhen...@linux.intel.com> Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + drivers/gpu/drm/i915/i915_vgpu.c| 44 ++--- 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 02adf82..fd2a87d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -156,8 +156,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) mutex_unlock(>struct_mutex); args->aper_size = ggtt->base.total; - args->aper_available_size = args->aper_size - pinned; - + args->aper_available_size = args->aper_size - + ggtt->base.reserved - pinned; return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index fb15684..da9aa9f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -255,6 +255,7 @@ struct i915_address_space { struct drm_i915_file_private *file; struct list_head global_link; u64 total; /* size addr space maps (ex. 2GB for ggtt) */ + u64 reserved; /* size addr space reserved */ bool closed; diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 4ab8a97..647ccad 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -92,6 +92,17 @@ struct _balloon_info_ { static struct _balloon_info_ bl_info; +static void vgt_deballoon_space(struct i915_ggtt *ggtt, +struct drm_mm_node *node) +{ + DRM_INFO("deballoon space: range [ 0x%llx - 0x%llx ] %llu KiB.\n", +node->start, node->start + node->size, node->size / 1024); + + ggtt->base.reserved -= node->size; + drm_mm_remove_node(node); + memset(node, 0, sizeof(*node)); +} + /** * intel_vgt_deballoon - deballoon reserved graphics address trunks * @dev_priv: i915 device private data @@ -108,18 +119,15 @@ void intel_vgt_deballoon(struct drm_i915_private *dev_priv) DRM_DEBUG("VGT deballoon.\n"); - for (i = 0; i < 4; i++) { - if (bl_info.space[i].allocated) - drm_mm_remove_node(_info.space[i]); - } - - memset(_info, 0, sizeof(bl_info)); + for (i = 0; i < 4; i++) + vgt_deballoon_space(_priv->ggtt, _info.space[i]); } static int vgt_balloon_space(struct i915_ggtt *ggtt, struct drm_mm_node *node, unsigned long start, unsigned long end) { + int ret; unsigned long size = end - start; if (start >= end) @@ -127,9 +135,14 @@ static int vgt_balloon_space(struct i915_ggtt *ggtt, DRM_INFO("balloon space: range [ 0x%lx - 0x%lx ] %lu KiB.\n", start, end, size / 1024); - return i915_gem_gtt_reserve(>base, node, + ret = i915_gem_gtt_reserve(>base, node, size, start, I915_COLOR_UNEVICTABLE, 0); + if (!ret) + ggtt->base.reserved += size; + else + memset(node, 0, sizeof(*node)); + return ret; } /** @@ -215,14 +228,14 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) ggtt->mappable_end, unmappable_base); if (ret) - goto err; + goto err_out; } if (unmappable_end < ggtt_end) { ret = vgt_balloon_space(ggtt, _info.space[3], unmappable_end, ggtt_end); if (ret) - goto err; + goto err
[Intel-gfx] [PATCH v5] drm/i915/gvt: return the correct usable aperture size under gvt environment
I915_GEM_GET_APERTURE ioctl is used to probe aperture size from userspace. In gvt environment, each vm only use the ballooned part of aperture, so we should return the correct available aperture size exclude the reserved part by balloon. v2: add 'reserved' in struct i915_address_space to record the reserved size in ggtt (Chris) v3: remain aper_size as total, adjust aper_available_size exclude reserved and pinned. UMD driver need to adjust the max allocation size according to the available aperture size but not total size. KMD return the correct usable aperture size any time (Chris, Joonas) v4: decrease reserved in deballoon (Joonas) v5: add onion teardown in balloon, add vgt_deballoon_space (Joonas) Suggested-by: Chris Wilson <ch...@chris-wilson.co.uk> Suggested-by: Joonas Lahtinen <joonas.lahti...@linux.intel.com> Cc: Chris Wilson <ch...@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com> Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + drivers/gpu/drm/i915/i915_vgpu.c| 44 ++--- 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 02adf82..fd2a87d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -156,8 +156,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) mutex_unlock(>struct_mutex); args->aper_size = ggtt->base.total; - args->aper_available_size = args->aper_size - pinned; - + args->aper_available_size = args->aper_size - + ggtt->base.reserved - pinned; return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index fb15684..da9aa9f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -255,6 +255,7 @@ struct i915_address_space { struct drm_i915_file_private *file; struct list_head global_link; u64 total; /* size addr space maps (ex. 2GB for ggtt) */ + u64 reserved; /* size addr space reserved */ bool closed; diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 4ab8a97..647ccad 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -92,6 +92,17 @@ struct _balloon_info_ { static struct _balloon_info_ bl_info; +static void vgt_deballoon_space(struct i915_ggtt *ggtt, +struct drm_mm_node *node) +{ + DRM_INFO("deballoon space: range [ 0x%llx - 0x%llx ] %llu KiB.\n", +node->start, node->start + node->size, node->size / 1024); + + ggtt->base.reserved -= node->size; + drm_mm_remove_node(node); + memset(node, 0, sizeof(*node)); +} + /** * intel_vgt_deballoon - deballoon reserved graphics address trunks * @dev_priv: i915 device private data @@ -108,18 +119,15 @@ void intel_vgt_deballoon(struct drm_i915_private *dev_priv) DRM_DEBUG("VGT deballoon.\n"); - for (i = 0; i < 4; i++) { - if (bl_info.space[i].allocated) - drm_mm_remove_node(_info.space[i]); - } - - memset(_info, 0, sizeof(bl_info)); + for (i = 0; i < 4; i++) + vgt_deballoon_space(_priv->ggtt, _info.space[i]); } static int vgt_balloon_space(struct i915_ggtt *ggtt, struct drm_mm_node *node, unsigned long start, unsigned long end) { + int ret; unsigned long size = end - start; if (start >= end) @@ -127,9 +135,14 @@ static int vgt_balloon_space(struct i915_ggtt *ggtt, DRM_INFO("balloon space: range [ 0x%lx - 0x%lx ] %lu KiB.\n", start, end, size / 1024); - return i915_gem_gtt_reserve(>base, node, + ret = i915_gem_gtt_reserve(>base, node, size, start, I915_COLOR_UNEVICTABLE, 0); + if (!ret) + ggtt->base.reserved += size; + else + memset(node, 0, sizeof(*node)); + return ret; } /** @@ -215,14 +228,14 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) ggtt->mappable_end, unmappable_base); if (ret) - goto err; + goto err_out; } if (unmappable_end < ggtt_end) { ret = vgt_balloon_space(ggtt, _info.space[3], unmappable_end, ggtt_end); if (ret) - goto err; + goto err_deballoon_upon_mappable; } /* Mappable graphic memory bal
[Intel-gfx] [PATCH v4] drm/i915/gvt: return the correct usable aperture size under gvt environment
I915_GEM_GET_APERTURE ioctl is used to probe aperture size from userspace. In gvt environment, each vm only use the ballooned part of aperture, so we should return the correct available aperture size exclude the reserved part by balloon. v2: add 'reserved' in struct i915_address_space to record the reserved size in ggtt. v3: remain aper_size as total, adjust aper_available_size exclude reserved and pinned. UMD driver need to adjust the max allocation size according to the available aperture size but not total size. KMD return the correct usable aperture size any time. v4: add onion teardown to balloon and deballoon to make sure the reserved stays correct. Code style refine. Cc: Chris Wilson <ch...@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com> Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + drivers/gpu/drm/i915/i915_vgpu.c| 8 +++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 33fb11c..8d8d9c0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -156,8 +156,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) mutex_unlock(>struct_mutex); args->aper_size = ggtt->base.total; - args->aper_available_size = args->aper_size - pinned; - + args->aper_available_size = args->aper_size - + ggtt->base.reserved - pinned; return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index fb15684..da9aa9f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -255,6 +255,7 @@ struct i915_address_space { struct drm_i915_file_private *file; struct list_head global_link; u64 total; /* size addr space maps (ex. 2GB for ggtt) */ + u64 reserved; /* size addr space reserved */ bool closed; diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 4ab8a97..25bed9b 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -109,8 +109,10 @@ void intel_vgt_deballoon(struct drm_i915_private *dev_priv) DRM_DEBUG("VGT deballoon.\n"); for (i = 0; i < 4; i++) { - if (bl_info.space[i].allocated) + if (bl_info.space[i].allocated) { + dev_priv->ggtt.base.reserved -= bl_info.space[i].size; drm_mm_remove_node(_info.space[i]); + } } memset(_info, 0, sizeof(bl_info)); @@ -216,6 +218,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) if (ret) goto err; + ggtt->base.reserved += bl_info.space[2].size; } if (unmappable_end < ggtt_end) { @@ -223,6 +226,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) unmappable_end, ggtt_end); if (ret) goto err; + ggtt->base.reserved += bl_info.space[3].size; } /* Mappable graphic memory ballooning */ @@ -232,6 +236,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) if (ret) goto err; + ggtt->base.reserved += bl_info.space[0].size; } if (mappable_end < ggtt->mappable_end) { @@ -240,6 +245,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) if (ret) goto err; + ggtt->base.reserved += bl_info.space[1].size; } DRM_INFO("VGT balloon successfully\n"); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v4] drm/i915/gvt: return the correct usable aperture size under gvt environment
I915_GEM_GET_APERTURE ioctl is used to probe aperture size from userspace. In gvt environment, each vm only use the ballooned part of aperture, so we should return the correct available aperture size exclude the reserved part by balloon. v2: add 'reserved' in struct i915_address_space to record the reserved size in ggtt. v3: remain aper_size as total, adjust aper_available_size exclude reserved and pinned. UMD driver need to adjust the max allocation size according to the available aperture size but not total size. KMD return the correct usable aperture size any time. v4: add onion teardown to balloon and deballoon to make sure the reserved stays correct. Code style refine. Cc: Chris Wilson <ch...@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com> Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + drivers/gpu/drm/i915/i915_vgpu.c| 8 +++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 33fb11c..8d8d9c0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -156,8 +156,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) mutex_unlock(>struct_mutex); args->aper_size = ggtt->base.total; - args->aper_available_size = args->aper_size - pinned; - + args->aper_available_size = args->aper_size - + ggtt->base.reserved - pinned; return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index fb15684..da9aa9f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -255,6 +255,7 @@ struct i915_address_space { struct drm_i915_file_private *file; struct list_head global_link; u64 total; /* size addr space maps (ex. 2GB for ggtt) */ + u64 reserved; /* size addr space reserved */ bool closed; diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 4ab8a97..b144cf6 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -109,8 +109,10 @@ void intel_vgt_deballoon(struct drm_i915_private *dev_priv) DRM_DEBUG("VGT deballoon.\n"); for (i = 0; i < 4; i++) { - if (bl_info.space[i].allocated) + if (bl_info.space[i].allocated) { + dev_priv->ggtt->base.reserved -= bl_info.space[i].size; drm_mm_remove_node(_info.space[i]); + } } memset(_info, 0, sizeof(bl_info)); @@ -216,6 +218,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) if (ret) goto err; + ggtt->base.reserved += bl_info.space[2].size; } if (unmappable_end < ggtt_end) { @@ -223,6 +226,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) unmappable_end, ggtt_end); if (ret) goto err; + ggtt->base.reserved += bl_info.space[3].size; } /* Mappable graphic memory ballooning */ @@ -232,6 +236,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) if (ret) goto err; + ggtt->base.reserved += bl_info.space[0].size; } if (mappable_end < ggtt->mappable_end) { @@ -240,6 +245,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) if (ret) goto err; + ggtt->base.reserved += bl_info.space[1].size; } DRM_INFO("VGT balloon successfully\n"); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3] drm/i915/gvt: return the actual aperture size under gvt environment
I915_GEM_GET_APERTURE ioctl is used to probe aperture size from userspace. In gvt environment, each vm only use the ballooned part of aperture, so we should return the actual available aperture size exclude the reserved part by balloon. v2: add 'reserved' in struct i915_address_space to record the reserved size in ggtt by balloon. v3: remain aper_size as total, adjust aper_available_size exclude reserved and pinned. UMD driver need to adjust the max allocation size according to the available aperture size but not total size. Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/i915_gem.c | 7 +++ drivers/gpu/drm/i915/i915_gem_gtt.h | 3 ++- drivers/gpu/drm/i915/i915_vgpu.c| 5 - 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 84ea249..e84576c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -145,9 +145,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) struct i915_ggtt *ggtt = _priv->ggtt; struct drm_i915_gem_get_aperture *args = data; struct i915_vma *vma; - size_t pinned; + size_t pinned = 0; - pinned = 0; mutex_lock(>struct_mutex); list_for_each_entry(vma, >base.active_list, vm_link) if (i915_vma_is_pinned(vma)) @@ -158,8 +157,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) mutex_unlock(>struct_mutex); args->aper_size = ggtt->base.total; - args->aper_available_size = args->aper_size - pinned; - + args->aper_available_size = args->aper_size + - ggtt->base.reserved - pinned; return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index fb15684..bdf832d 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -255,7 +255,8 @@ struct i915_address_space { struct drm_i915_file_private *file; struct list_head global_link; u64 total; /* size addr space maps (ex. 2GB for ggtt) */ - + /* size addr space reserved by GVT balloon, only used for ggtt */ + u64 reserved; bool closed; struct i915_page_dma scratch_page; diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 4ab8a97..58055a9 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -183,7 +183,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) unsigned long mappable_base, mappable_size, mappable_end; unsigned long unmappable_base, unmappable_size, unmappable_end; - int ret; + int ret, i; if (!intel_vgpu_active(dev_priv)) return 0; @@ -242,6 +242,9 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) goto err; } + for (i = 0; i < ARRAY_SIZE(bl_info.space); i++) + ggtt->base.reserved += bl_info.space[i].size; + DRM_INFO("VGT balloon successfully\n"); return 0; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2] drm/i915/gvt: return the actual aperture size under gvt environment
I915_GEM_GET_APERTURE ioctl is used to probe aperture size from userspace. In gvt environment, each vm only use the ballooned part of aperture, so we should return the actual aperture size exclude the reserved part by balloon. I915_GEM_CONTEXT_GETPARAM ioctl query the I915_CONTEXT_PARAM_GTT_SIZE, we also need to exclude the reserved part in GTT. v2: add 'reserved' in struct i915_address_space to record the reserved size in ggtt by balloon. Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/i915_gem.c | 6 ++ drivers/gpu/drm/i915/i915_gem_context.c | 3 ++- drivers/gpu/drm/i915/i915_gem_gtt.h | 3 ++- drivers/gpu/drm/i915/i915_vgpu.c| 5 - 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 84ea249..8736717 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -145,9 +145,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) struct i915_ggtt *ggtt = _priv->ggtt; struct drm_i915_gem_get_aperture *args = data; struct i915_vma *vma; - size_t pinned; + size_t pinned = 0; - pinned = 0; mutex_lock(>struct_mutex); list_for_each_entry(vma, >base.active_list, vm_link) if (i915_vma_is_pinned(vma)) @@ -157,9 +156,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) pinned += vma->node.size; mutex_unlock(>struct_mutex); - args->aper_size = ggtt->base.total; + args->aper_size = ggtt->base.total - ggtt->base.reserved; args->aper_available_size = args->aper_size - pinned; - return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 8bd0c49..f31e8cf 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -1053,7 +1053,8 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, else if (to_i915(dev)->mm.aliasing_ppgtt) args->value = to_i915(dev)->mm.aliasing_ppgtt->base.total; else - args->value = to_i915(dev)->ggtt.base.total; + args->value = to_i915(dev)->ggtt.base.total - + to_i915(dev)->ggtt.base.reserved; break; case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: args->value = i915_gem_context_no_error_capture(ctx); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index fb15684..bdf832d 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -255,7 +255,8 @@ struct i915_address_space { struct drm_i915_file_private *file; struct list_head global_link; u64 total; /* size addr space maps (ex. 2GB for ggtt) */ - + /* size addr space reserved by GVT balloon, only used for ggtt */ + u64 reserved; bool closed; struct i915_page_dma scratch_page; diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 4ab8a97..58055a9 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -183,7 +183,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) unsigned long mappable_base, mappable_size, mappable_end; unsigned long unmappable_base, unmappable_size, unmappable_end; - int ret; + int ret, i; if (!intel_vgpu_active(dev_priv)) return 0; @@ -242,6 +242,9 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) goto err; } + for (i = 0; i < ARRAY_SIZE(bl_info.space); i++) + ggtt->base.reserved += bl_info.space[i].size; + DRM_INFO("VGT balloon successfully\n"); return 0; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/gvt: return the actual aperture size under gvt environment
I915_GEM_GET_APERTURE ioctl is used to probe aperture size from userspace. Some applications like OpenCL use this information to know how much GM resource can it use. In gvt environment, each vm only use the ballooned part of aperture, so we should return the actual aperture size exclude the reserved part by balloon. I915_GEM_CONTEXT_GETPARAM ioctl query the I915_CONTEXT_PARAM_GTT_SIZE, we also need to exclude the reserved part in GTT. Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/i915_gem.c | 7 +++ drivers/gpu/drm/i915/i915_gem_context.c | 4 +++- drivers/gpu/drm/i915/i915_vgpu.c| 18 +- drivers/gpu/drm/i915/i915_vgpu.h| 2 ++ 4 files changed, 25 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 84ea249..b3fb424 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -145,9 +145,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) struct i915_ggtt *ggtt = _priv->ggtt; struct drm_i915_gem_get_aperture *args = data; struct i915_vma *vma; - size_t pinned; + size_t pinned = 0; - pinned = 0; mutex_lock(>struct_mutex); list_for_each_entry(vma, >base.active_list, vm_link) if (i915_vma_is_pinned(vma)) @@ -157,9 +156,9 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) pinned += vma->node.size; mutex_unlock(>struct_mutex); - args->aper_size = ggtt->base.total; + args->aper_size = ggtt->base.total - +intel_vgt_reserved_size_by_balloon(dev_priv); args->aper_available_size = args->aper_size - pinned; - return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 8bd0c49..9f3280d 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -88,6 +88,7 @@ #include #include #include "i915_drv.h" +#include "i915_vgpu.h" #include "i915_trace.h" #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 @@ -1053,7 +1054,8 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, else if (to_i915(dev)->mm.aliasing_ppgtt) args->value = to_i915(dev)->mm.aliasing_ppgtt->base.total; else - args->value = to_i915(dev)->ggtt.base.total; + args->value = to_i915(dev)->ggtt.base.total - + intel_vgt_reserved_size_by_balloon(dev->dev_private); break; case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: args->value = i915_gem_context_no_error_capture(ctx); diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 4ab8a97..ce722d8 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -88,6 +88,11 @@ struct _balloon_info_ { * graphic memory, 2/3 for unmappable graphic memory. */ struct drm_mm_node space[4]; + /* +* Total space size exclude ballooned named reserved_total, it's +* invisible for vGPU. +*/ + size_t reserved_total; }; static struct _balloon_info_ bl_info; @@ -116,6 +121,14 @@ void intel_vgt_deballoon(struct drm_i915_private *dev_priv) memset(_info, 0, sizeof(bl_info)); } +size_t intel_vgt_reserved_size_by_balloon(struct drm_i915_private *dev_priv) +{ + if (!intel_vgpu_active(dev_priv)) + return 0; + + return bl_info.reserved_total; +} + static int vgt_balloon_space(struct i915_ggtt *ggtt, struct drm_mm_node *node, unsigned long start, unsigned long end) @@ -183,7 +196,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) unsigned long mappable_base, mappable_size, mappable_end; unsigned long unmappable_base, unmappable_size, unmappable_end; - int ret; + int ret, i; if (!intel_vgpu_active(dev_priv)) return 0; @@ -242,6 +255,9 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) goto err; } + for (i = 0; i < ARRAY_SIZE(bl_info.space); i++) + bl_info.reserved_total += bl_info.space[i].size; + DRM_INFO("VGT balloon successfully\n"); return 0; diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h index 3c3b2d2..e776580 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.h +++ b/drivers/gpu/drm/i915/i915_vgpu.h @@ -29,5 +29,7 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv); int intel_vgt_balloon(struct drm_i915_private *dev_priv); void intel_vgt_deballoon(struct drm_i915_private *dev_priv); +size_t intel_vgt
[Intel-gfx] [PATCH] drm/i915/gvt: add pcode read/write emulation of BDW
Add pcode read/write emulation in gvt for BDW. Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/gvt/handlers.c | 33 ++--- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index f89b183..223b366 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1304,21 +1304,24 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, u32 *data0 = _vreg(vgpu, GEN6_PCODE_DATA); switch (cmd) { - case 0x6: - /** -* "Read memory latency" command on gen9. -* Below memory latency values are read -* from skylake platform. -*/ - if (!*data0) - *data0 = 0x1e1a1100; - else - *data0 = 0x61514b3d; + case GEN9_PCODE_READ_MEM_LATENCY: + if (IS_SKYLAKE(vgpu->gvt->dev_priv)) { + /** +* "Read memory latency" command on gen9. +* Below memory latency values are read +* from skylake platform. +*/ + if (!*data0) + *data0 = 0x1e1a1100; + else + *data0 = 0x61514b3d; + } break; case SKL_PCODE_CDCLK_CONTROL: - *data0 = SKL_CDCLK_READY_FOR_CHANGE; + if (IS_SKYLAKE(vgpu->gvt->dev_priv)) + *data0 = SKL_CDCLK_READY_FOR_CHANGE; break; - case 0x5: + case GEN6_PCODE_READ_RC6VIDS: *data0 |= 0x1; break; } @@ -2202,7 +2205,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_F(0x4f000, 0x90, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_D(GEN6_PCODE_MAILBOX, D_PRE_SKL); + MMIO_D(GEN6_PCODE_MAILBOX, D_PRE_BDW); MMIO_D(GEN6_PCODE_DATA, D_ALL); MMIO_D(0x13812c, D_ALL); MMIO_DH(GEN7_ERR_INT, D_ALL, NULL, NULL); @@ -2281,7 +2284,6 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(0x1a054, D_ALL); MMIO_D(0x44070, D_ALL); - MMIO_D(0x215c, D_HSW_PLUS); MMIO_DFH(0x2178, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x217c, D_ALL, F_CMD_ACCESS, NULL, NULL); @@ -2453,6 +2455,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN7_MISCCPCTL, D_BDW_PLUS); MMIO_D(0x1c054, D_BDW_PLUS); + MMIO_DH(GEN6_PCODE_MAILBOX, D_BDW_PLUS, NULL, mailbox_write); + MMIO_D(GEN8_PRIVATE_PAT_LO, D_BDW_PLUS); MMIO_D(GEN8_PRIVATE_PAT_HI, D_BDW_PLUS); @@ -2544,7 +2548,6 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(HSW_PWR_WELL_BIOS, D_SKL); MMIO_DH(HSW_PWR_WELL_DRIVER, D_SKL, NULL, skl_power_well_ctl_write); - MMIO_DH(GEN6_PCODE_MAILBOX, D_SKL, NULL, mailbox_write); MMIO_D(0xa210, D_SKL_PLUS); MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: check status and reply value both in skl_pcode_try_request()
skl_pcode_try_request() call sandybridge_pcode_read(), check both return status and value simultanously, ensure it got correct value without error. Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ae2c0bb..e7b12ec 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7882,7 +7882,7 @@ static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox, *status = sandybridge_pcode_read(dev_priv, mbox, ); - return *status || ((val & reply_mask) == reply); + return (!*status) && ((val & reply_mask) == reply); } /** -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 2/2] drm/i915: clean up unused vgpu_read/write
Having converted the force_wake_get/_put routines for a vGPU to be no-op, we can use the common mmio accessors and remove our specialised routines that simply skipped the calls to control force_wake. Signed-off-by: Weinan Li <weinan.z...@intel.com> Reviewed-by: Chris Wilson <ch...@chris-wilson.co.uk> --- drivers/gpu/drm/i915/intel_uncore.c | 58 - 1 file changed, 58 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 9fad4de..e9046fa 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1052,34 +1052,6 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, #undef GEN6_READ_FOOTER #undef GEN6_READ_HEADER -#define VGPU_READ_HEADER(x) \ - unsigned long irqflags; \ - u##x val = 0; \ - assert_rpm_device_not_suspended(dev_priv); \ - spin_lock_irqsave(_priv->uncore.lock, irqflags) - -#define VGPU_READ_FOOTER \ - spin_unlock_irqrestore(_priv->uncore.lock, irqflags); \ - trace_i915_reg_rw(false, reg, val, sizeof(val), trace); \ - return val - -#define __vgpu_read(x) \ -static u##x \ -vgpu_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ - VGPU_READ_HEADER(x); \ - val = __raw_i915_read##x(dev_priv, reg); \ - VGPU_READ_FOOTER; \ -} - -__vgpu_read(8) -__vgpu_read(16) -__vgpu_read(32) -__vgpu_read(64) - -#undef __vgpu_read -#undef VGPU_READ_FOOTER -#undef VGPU_READ_HEADER - #define GEN2_WRITE_HEADER \ trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ assert_rpm_wakelock_held(dev_priv); \ @@ -1202,31 +1174,6 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, #undef GEN6_WRITE_FOOTER #undef GEN6_WRITE_HEADER -#define VGPU_WRITE_HEADER \ - unsigned long irqflags; \ - trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ - assert_rpm_device_not_suspended(dev_priv); \ - spin_lock_irqsave(_priv->uncore.lock, irqflags) - -#define VGPU_WRITE_FOOTER \ - spin_unlock_irqrestore(_priv->uncore.lock, irqflags) - -#define __vgpu_write(x) \ -static void vgpu_write##x(struct drm_i915_private *dev_priv, \ - i915_reg_t reg, u##x val, bool trace) { \ - VGPU_WRITE_HEADER; \ - __raw_i915_write##x(dev_priv, reg, val); \ - VGPU_WRITE_FOOTER; \ -} - -__vgpu_write(8) -__vgpu_write(16) -__vgpu_write(32) - -#undef __vgpu_write -#undef VGPU_WRITE_FOOTER -#undef VGPU_WRITE_HEADER - #define ASSIGN_WRITE_MMIO_VFUNCS(x) \ do { \ dev_priv->uncore.funcs.mmio_writeb = x##_write8; \ @@ -1462,11 +1409,6 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) >= 8) intel_shadow_table_check(); - if (intel_vgpu_active(dev_priv)) { - ASSIGN_WRITE_MMIO_VFUNCS(vgpu); - ASSIGN_READ_MMIO_VFUNCS(vgpu); - } - i915_check_and_clear_faults(dev_priv); } #undef ASSIGN_WRITE_MMIO_VFUNCS -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 1/2] drm/i915: noop forcewake get/put when vgpu activated
For a virtualized GPU, the host maintains the forcewake state on the real device. As we don't control forcewake ourselves, we can simply set force_wake_get() and force_wake_put() to be no-ops. By setting the vfuncs, we adjust both the manual control of forcewake and around the mmio accessors (making our vgpu specific mmio routines redundant and to be removed in the next patch). Signed-off-by: Weinan Li <weinan.z...@intel.com> Reviewed-by: Chris Wilson <ch...@chris-wilson.co.uk> --- drivers/gpu/drm/i915/intel_uncore.c | 13 + 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index abe0888..9fad4de 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -133,6 +133,13 @@ } static void +vgpu_fw_domains_nop(struct drm_i915_private *dev_priv, + enum forcewake_domains fw_domains) +{ + /* Guest driver doesn't need to takes care forcewake. */ +} + +static void fw_domains_posting_read(struct drm_i915_private *dev_priv) { struct intel_uncore_forcewake_domain *d; @@ -1374,6 +1381,12 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE, FORCEWAKE_ACK); } + if (intel_vgpu_active(dev_priv)) { + dev_priv->uncore.funcs.force_wake_get = + vgpu_fw_domains_nop; + dev_priv->uncore.funcs.force_wake_put = + vgpu_fw_domains_nop; + } /* All future platforms are expected to require complex power gating */ WARN_ON(dev_priv->uncore.fw_domains == 0); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 3/3] drm/i915: clean up unused vgpu_read/write
Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/intel_uncore.c | 58 - 1 file changed, 58 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 9fad4de..e9046fa 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1052,34 +1052,6 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, #undef GEN6_READ_FOOTER #undef GEN6_READ_HEADER -#define VGPU_READ_HEADER(x) \ - unsigned long irqflags; \ - u##x val = 0; \ - assert_rpm_device_not_suspended(dev_priv); \ - spin_lock_irqsave(_priv->uncore.lock, irqflags) - -#define VGPU_READ_FOOTER \ - spin_unlock_irqrestore(_priv->uncore.lock, irqflags); \ - trace_i915_reg_rw(false, reg, val, sizeof(val), trace); \ - return val - -#define __vgpu_read(x) \ -static u##x \ -vgpu_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ - VGPU_READ_HEADER(x); \ - val = __raw_i915_read##x(dev_priv, reg); \ - VGPU_READ_FOOTER; \ -} - -__vgpu_read(8) -__vgpu_read(16) -__vgpu_read(32) -__vgpu_read(64) - -#undef __vgpu_read -#undef VGPU_READ_FOOTER -#undef VGPU_READ_HEADER - #define GEN2_WRITE_HEADER \ trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ assert_rpm_wakelock_held(dev_priv); \ @@ -1202,31 +1174,6 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, #undef GEN6_WRITE_FOOTER #undef GEN6_WRITE_HEADER -#define VGPU_WRITE_HEADER \ - unsigned long irqflags; \ - trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ - assert_rpm_device_not_suspended(dev_priv); \ - spin_lock_irqsave(_priv->uncore.lock, irqflags) - -#define VGPU_WRITE_FOOTER \ - spin_unlock_irqrestore(_priv->uncore.lock, irqflags) - -#define __vgpu_write(x) \ -static void vgpu_write##x(struct drm_i915_private *dev_priv, \ - i915_reg_t reg, u##x val, bool trace) { \ - VGPU_WRITE_HEADER; \ - __raw_i915_write##x(dev_priv, reg, val); \ - VGPU_WRITE_FOOTER; \ -} - -__vgpu_write(8) -__vgpu_write(16) -__vgpu_write(32) - -#undef __vgpu_write -#undef VGPU_WRITE_FOOTER -#undef VGPU_WRITE_HEADER - #define ASSIGN_WRITE_MMIO_VFUNCS(x) \ do { \ dev_priv->uncore.funcs.mmio_writeb = x##_write8; \ @@ -1462,11 +1409,6 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) >= 8) intel_shadow_table_check(); - if (intel_vgpu_active(dev_priv)) { - ASSIGN_WRITE_MMIO_VFUNCS(vgpu); - ASSIGN_READ_MMIO_VFUNCS(vgpu); - } - i915_check_and_clear_faults(dev_priv); } #undef ASSIGN_WRITE_MMIO_VFUNCS -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 2/3] drm/i915: noop forcewake get/put when vgpu activated
Host maintian the hardware's forcewake state, guest don't need and also can't control it. Although vgpu_read/write bypass forcewake_get/put in MMIO read/write, but still have separate path called by "intel_uncore_forcewake_get/put" and "intel_uncore_forcewake_get/put__locked". Unnecessary MMIO access in guest waste much CPU cost. Since we full virtualize the MMIO, just noop the forcewake get/put. Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/intel_uncore.c | 13 + 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index abe0888..9fad4de 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -133,6 +133,13 @@ } static void +vgpu_fw_domains_nop(struct drm_i915_private *dev_priv, + enum forcewake_domains fw_domains) +{ + /* Guest driver doesn't need to takes care forcewake. */ +} + +static void fw_domains_posting_read(struct drm_i915_private *dev_priv) { struct intel_uncore_forcewake_domain *d; @@ -1374,6 +1381,12 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE, FORCEWAKE_ACK); } + if (intel_vgpu_active(dev_priv)) { + dev_priv->uncore.funcs.force_wake_get = + vgpu_fw_domains_nop; + dev_priv->uncore.funcs.force_wake_put = + vgpu_fw_domains_nop; + } /* All future platforms are expected to require complex power gating */ WARN_ON(dev_priv->uncore.fw_domains == 0); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] drm/i915: ignore posting read when using vgpu
No need to do posting read when vgpu actived. Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 52d01be..8c9d81b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3830,8 +3830,11 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder, } while (upper != old_upper && loop++ < 2); \ (u64)upper << 32 | lower; }) -#define POSTING_READ(reg) (void)I915_READ_NOTRACE(reg) -#define POSTING_READ16(reg)(void)I915_READ16_NOTRACE(reg) +#define POSTING_READ(reg) ( \ + intel_vgpu_active(dev_priv) ? (void)0 : (void)I915_READ_NOTRACE(reg)) + +#define POSTING_READ16(reg) ( \ + intel_vgpu_active(dev_priv) ? (void)0 : (void)I915_READ16_NOTRACE(reg)) #define __raw_read(x, s) \ static inline uint##x##_t __raw_i915_read##x(struct drm_i915_private *dev_priv, \ @@ -3888,7 +3891,8 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder, #define I915_READ_FW(reg__) __raw_i915_read32(dev_priv, (reg__)) #define I915_WRITE_FW(reg__, val__) __raw_i915_write32(dev_priv, (reg__), (val__)) #define I915_WRITE64_FW(reg__, val__) __raw_i915_write64(dev_priv, (reg__), (val__)) -#define POSTING_READ_FW(reg__) (void)I915_READ_FW(reg__) +#define POSTING_READ_FW(reg__) ( \ + intel_vgpu_active(dev_priv) ? (void)0 : (void)I915_READ_FW(reg__)) /* "Broadcast RGB" property */ #define INTEL_BROADCAST_RGB_AUTO 0 -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915: ignore forcewake get/put when using vgpu
Host maintian the hardware's forcewake state, guest don't need and also can't control it. Although vgpu_read/write bypass forcewake_get/put in MMIO read/write, but still have separate path called by "intel_uncore_forcewake_get/put" and "intel_uncore_forcewake_get/put__locked". Unnecessary MMIO access in guest waste much CPU cost. Since we full virtualize the MMIO, just ignore the forcewake get/put in low level. Signed-off-by: Weinan Li <weinan.z...@intel.com> --- drivers/gpu/drm/i915/intel_uncore.c | 78 ++--- 1 file changed, 20 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index abe0888..08e1b5f 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -133,6 +133,20 @@ } static void +vgpu_fw_domains_get(struct drm_i915_private *dev_priv, + enum forcewake_domains fw_domains) +{ + /* Guest driver doesn't need to takes care forcewake. */; +} + +static void +vgpu_fw_domains_put(struct drm_i915_private *dev_priv, + enum forcewake_domains fw_domains) +{ + /* Guest driver doesn't need to takes care forcewake. */; +} + +static void fw_domains_posting_read(struct drm_i915_private *dev_priv) { struct intel_uncore_forcewake_domain *d; @@ -1045,34 +1059,6 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, #undef GEN6_READ_FOOTER #undef GEN6_READ_HEADER -#define VGPU_READ_HEADER(x) \ - unsigned long irqflags; \ - u##x val = 0; \ - assert_rpm_device_not_suspended(dev_priv); \ - spin_lock_irqsave(_priv->uncore.lock, irqflags) - -#define VGPU_READ_FOOTER \ - spin_unlock_irqrestore(_priv->uncore.lock, irqflags); \ - trace_i915_reg_rw(false, reg, val, sizeof(val), trace); \ - return val - -#define __vgpu_read(x) \ -static u##x \ -vgpu_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ - VGPU_READ_HEADER(x); \ - val = __raw_i915_read##x(dev_priv, reg); \ - VGPU_READ_FOOTER; \ -} - -__vgpu_read(8) -__vgpu_read(16) -__vgpu_read(32) -__vgpu_read(64) - -#undef __vgpu_read -#undef VGPU_READ_FOOTER -#undef VGPU_READ_HEADER - #define GEN2_WRITE_HEADER \ trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ assert_rpm_wakelock_held(dev_priv); \ @@ -1195,31 +1181,6 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, #undef GEN6_WRITE_FOOTER #undef GEN6_WRITE_HEADER -#define VGPU_WRITE_HEADER \ - unsigned long irqflags; \ - trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ - assert_rpm_device_not_suspended(dev_priv); \ - spin_lock_irqsave(_priv->uncore.lock, irqflags) - -#define VGPU_WRITE_FOOTER \ - spin_unlock_irqrestore(_priv->uncore.lock, irqflags) - -#define __vgpu_write(x) \ -static void vgpu_write##x(struct drm_i915_private *dev_priv, \ - i915_reg_t reg, u##x val, bool trace) { \ - VGPU_WRITE_HEADER; \ - __raw_i915_write##x(dev_priv, reg, val); \ - VGPU_WRITE_FOOTER; \ -} - -__vgpu_write(8) -__vgpu_write(16) -__vgpu_write(32) - -#undef __vgpu_write -#undef VGPU_WRITE_FOOTER -#undef VGPU_WRITE_HEADER - #define ASSIGN_WRITE_MMIO_VFUNCS(x) \ do { \ dev_priv->uncore.funcs.mmio_writeb = x##_write8; \ @@ -1375,6 +1336,12 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) FORCEWAKE, FORCEWAKE_ACK); } + if (intel_vgpu_active(dev_priv)) { + dev_priv->uncore.funcs.force_wake_get = + vgpu_fw_domains_get; + dev_priv->uncore.funcs.force_wake_put = + vgpu_fw_domains_put; + } /* All future platforms are expected to require complex power gating */ WARN_ON(dev_priv->uncore.fw_domains == 0); } @@ -1449,11 +1416,6 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) >= 8) intel_shadow_table_check(); - if (intel_vgpu_active(dev_priv)) { - ASSIGN_WRITE_MMIO_VFUNCS(vgpu); - ASSIGN_READ_MMIO_VFUNCS(vgpu); - } - i915_check_and_clear_faults(dev_priv); } #undef ASSIGN_WRITE_MMIO_VFUNCS -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx