Re: [Intel-gfx] [PATCH 2/2] drm/i915/perf: fix ctx_id read with GuC & ICL
On 31/05/18 21:46, Michel Thierry wrote: On 5/31/2018 12:56 PM, Lionel Landwerlin wrote: One thing we didn't really understand about the OA report is that the ContextID field (dword 2) is copy of the context descriptor (dword 1). On Gen8->10 and without using GuC we didn't notice the issue because we only checked the 21bits of the ContextID field in the OA reports which matches exactly the hw_id stored into the context descriptor. When using GuC submission we have an issue of a non matching hw_id because GuC uses bit 20 of the hw_id to signal proxy submission. This change makes introduces a mask to compare only the relevant bits. Choose one: makes or introduces ;) On ICL the context descriptor format has changed and we failed to address this. On top of using a mask we also need to shift the bits properly. Someone is going to complain this should be two patches (one to address the GuC-ness and one for Gen11), but not me. Reviewed-by: Michel Thierry Kind of agree, but that's a pain. We can put it as a fix of the first commit that enabled gen8 and I'll do the backport in stable versions. Cheers, - Lionel Signed-off-by: Lionel Landwerlin Fixes: 1de401c08fa805 ("drm/i915/perf: enable perf support on ICL") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104252 BSpec: 1237 Testcase: igt/perf/gen8-unprivileged-single-ctx-counters --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_perf.c | 123 --- drivers/gpu/drm/i915/intel_lrc.c | 5 ++ 3 files changed, 101 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 58ab9259fb73..0ccda488a8db 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1953,6 +1953,7 @@ struct drm_i915_private { struct intel_context *pinned_ctx; u32 specific_ctx_id; + u32 specific_ctx_id_mask; struct hrtimer poll_check_timer; wait_queue_head_t poll_wq; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 4a62024cbf85..d5e5d4635f1f 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -738,12 +738,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, continue; } - /* - * XXX: Just keep the lower 21 bits for now since I'm not - * entirely sure if the HW touches any of the higher bits in - * this field - */ - ctx_id = report32[2] & 0x1f; + ctx_id = report32[2] & dev_priv->perf.oa.specific_ctx_id_mask; /* * Squash whatever is in the CTX_ID field if it's marked as @@ -1204,6 +1199,36 @@ static int i915_oa_read(struct i915_perf_stream *stream, return dev_priv->perf.oa.ops.read(stream, buf, count, offset); } +static int oa_get_render_lrca(struct drm_i915_private *i915, + struct i915_gem_context *ctx, + u32 *lrca) +{ + struct intel_engine_cs *engine = i915->engine[RCS]; + struct intel_context *ce; + int ret; + + ret = i915_mutex_lock_interruptible(>drm); + if (ret) + return ret; + + /* + * As the ID is the gtt offset of the context's vma we + * pin the vma to ensure the ID remains fixed. + * + * NB: implied RCS engine... + */ + ce = intel_context_pin(ctx, engine); + mutex_unlock(>drm.struct_mutex); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + i915->perf.oa.pinned_ctx = ce; + + *lrca = i915_ggtt_offset(ce->state); + + return 0; +} + /** * oa_get_render_ctx_id - determine and hold ctx hw id * @stream: An i915-perf stream opened for OA metrics @@ -1216,40 +1241,81 @@ static int i915_oa_read(struct i915_perf_stream *stream, */ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct drm_i915_private *i915 = stream->dev_priv; - if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { - dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id; - } else { - struct intel_engine_cs *engine = dev_priv->engine[RCS]; - struct intel_context *ce; + switch (INTEL_GEN(i915)) { + case 7: { int ret; - ret = i915_mutex_lock_interruptible(_priv->drm); + ret = oa_get_render_lrca(i915, stream->ctx, + >perf.oa.specific_ctx_id); if (ret) return ret; /* - * As the ID is the gtt offset of the context's vma we - * pin the vma to ensure the ID remains fixed. - * - * NB: implied RCS engine... + * On Haswell we don't do any post processing of the reports + * and don't need to use the mask. */ - ce = intel_context_pin(stream->ctx, engine); - mutex_unlock(_priv->drm.struct_mutex); - if
Re: [Intel-gfx] [PATCH 2/2] drm/i915/perf: fix ctx_id read with GuC & ICL
On 5/31/2018 12:56 PM, Lionel Landwerlin wrote: One thing we didn't really understand about the OA report is that the ContextID field (dword 2) is copy of the context descriptor (dword 1). On Gen8->10 and without using GuC we didn't notice the issue because we only checked the 21bits of the ContextID field in the OA reports which matches exactly the hw_id stored into the context descriptor. When using GuC submission we have an issue of a non matching hw_id because GuC uses bit 20 of the hw_id to signal proxy submission. This change makes introduces a mask to compare only the relevant bits. Choose one: makes or introduces ;) On ICL the context descriptor format has changed and we failed to address this. On top of using a mask we also need to shift the bits properly. Someone is going to complain this should be two patches (one to address the GuC-ness and one for Gen11), but not me. Reviewed-by: Michel Thierry Signed-off-by: Lionel Landwerlin Fixes: 1de401c08fa805 ("drm/i915/perf: enable perf support on ICL") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104252 BSpec: 1237 Testcase: igt/perf/gen8-unprivileged-single-ctx-counters --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_perf.c | 123 --- drivers/gpu/drm/i915/intel_lrc.c | 5 ++ 3 files changed, 101 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 58ab9259fb73..0ccda488a8db 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1953,6 +1953,7 @@ struct drm_i915_private { struct intel_context *pinned_ctx; u32 specific_ctx_id; + u32 specific_ctx_id_mask; struct hrtimer poll_check_timer; wait_queue_head_t poll_wq; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 4a62024cbf85..d5e5d4635f1f 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -738,12 +738,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, continue; } - /* -* XXX: Just keep the lower 21 bits for now since I'm not -* entirely sure if the HW touches any of the higher bits in -* this field -*/ - ctx_id = report32[2] & 0x1f; + ctx_id = report32[2] & dev_priv->perf.oa.specific_ctx_id_mask; /* * Squash whatever is in the CTX_ID field if it's marked as @@ -1204,6 +1199,36 @@ static int i915_oa_read(struct i915_perf_stream *stream, return dev_priv->perf.oa.ops.read(stream, buf, count, offset); } +static int oa_get_render_lrca(struct drm_i915_private *i915, + struct i915_gem_context *ctx, + u32 *lrca) +{ + struct intel_engine_cs *engine = i915->engine[RCS]; + struct intel_context *ce; + int ret; + + ret = i915_mutex_lock_interruptible(>drm); + if (ret) + return ret; + + /* +* As the ID is the gtt offset of the context's vma we +* pin the vma to ensure the ID remains fixed. +* +* NB: implied RCS engine... +*/ + ce = intel_context_pin(ctx, engine); + mutex_unlock(>drm.struct_mutex); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + i915->perf.oa.pinned_ctx = ce; + + *lrca = i915_ggtt_offset(ce->state); + + return 0; +} + /** * oa_get_render_ctx_id - determine and hold ctx hw id * @stream: An i915-perf stream opened for OA metrics @@ -1216,40 +1241,81 @@ static int i915_oa_read(struct i915_perf_stream *stream, */ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct drm_i915_private *i915 = stream->dev_priv; - if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { - dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id; - } else { - struct intel_engine_cs *engine = dev_priv->engine[RCS]; - struct intel_context *ce; + switch (INTEL_GEN(i915)) { + case 7: { int ret; - ret = i915_mutex_lock_interruptible(_priv->drm); + ret = oa_get_render_lrca(i915, stream->ctx, +>perf.oa.specific_ctx_id); if (ret) return ret; /* -* As the ID is the gtt offset of the context's vma we -* pin the vma to ensure the ID remains fixed. -* -* NB: implied RCS engine... +* On Haswell we don't do any post processing of the reports +* and don't need to use the mask. */ - ce = intel_context_pin(stream->ctx,
[Intel-gfx] [PATCH 2/2] drm/i915/perf: fix ctx_id read with GuC & ICL
One thing we didn't really understand about the OA report is that the ContextID field (dword 2) is copy of the context descriptor (dword 1). On Gen8->10 and without using GuC we didn't notice the issue because we only checked the 21bits of the ContextID field in the OA reports which matches exactly the hw_id stored into the context descriptor. When using GuC submission we have an issue of a non matching hw_id because GuC uses bit 20 of the hw_id to signal proxy submission. This change makes introduces a mask to compare only the relevant bits. On ICL the context descriptor format has changed and we failed to address this. On top of using a mask we also need to shift the bits properly. Signed-off-by: Lionel Landwerlin Fixes: 1de401c08fa805 ("drm/i915/perf: enable perf support on ICL") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104252 BSpec: 1237 Testcase: igt/perf/gen8-unprivileged-single-ctx-counters --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_perf.c | 123 --- drivers/gpu/drm/i915/intel_lrc.c | 5 ++ 3 files changed, 101 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 58ab9259fb73..0ccda488a8db 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1953,6 +1953,7 @@ struct drm_i915_private { struct intel_context *pinned_ctx; u32 specific_ctx_id; + u32 specific_ctx_id_mask; struct hrtimer poll_check_timer; wait_queue_head_t poll_wq; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 4a62024cbf85..d5e5d4635f1f 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -738,12 +738,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, continue; } - /* -* XXX: Just keep the lower 21 bits for now since I'm not -* entirely sure if the HW touches any of the higher bits in -* this field -*/ - ctx_id = report32[2] & 0x1f; + ctx_id = report32[2] & dev_priv->perf.oa.specific_ctx_id_mask; /* * Squash whatever is in the CTX_ID field if it's marked as @@ -1204,6 +1199,36 @@ static int i915_oa_read(struct i915_perf_stream *stream, return dev_priv->perf.oa.ops.read(stream, buf, count, offset); } +static int oa_get_render_lrca(struct drm_i915_private *i915, + struct i915_gem_context *ctx, + u32 *lrca) +{ + struct intel_engine_cs *engine = i915->engine[RCS]; + struct intel_context *ce; + int ret; + + ret = i915_mutex_lock_interruptible(>drm); + if (ret) + return ret; + + /* +* As the ID is the gtt offset of the context's vma we +* pin the vma to ensure the ID remains fixed. +* +* NB: implied RCS engine... +*/ + ce = intel_context_pin(ctx, engine); + mutex_unlock(>drm.struct_mutex); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + i915->perf.oa.pinned_ctx = ce; + + *lrca = i915_ggtt_offset(ce->state); + + return 0; +} + /** * oa_get_render_ctx_id - determine and hold ctx hw id * @stream: An i915-perf stream opened for OA metrics @@ -1216,40 +1241,81 @@ static int i915_oa_read(struct i915_perf_stream *stream, */ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct drm_i915_private *i915 = stream->dev_priv; - if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { - dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id; - } else { - struct intel_engine_cs *engine = dev_priv->engine[RCS]; - struct intel_context *ce; + switch (INTEL_GEN(i915)) { + case 7: { int ret; - ret = i915_mutex_lock_interruptible(_priv->drm); + ret = oa_get_render_lrca(i915, stream->ctx, +>perf.oa.specific_ctx_id); if (ret) return ret; /* -* As the ID is the gtt offset of the context's vma we -* pin the vma to ensure the ID remains fixed. -* -* NB: implied RCS engine... +* On Haswell we don't do any post processing of the reports +* and don't need to use the mask. */ - ce = intel_context_pin(stream->ctx, engine); - mutex_unlock(_priv->drm.struct_mutex); - if (IS_ERR(ce)) - return PTR_ERR(ce); +