[PATCH v3 06/11] drm/i915: Enable i915 perf stream for Haswell OA unit

2016-08-15 Thread Chris Wilson
On Mon, Aug 15, 2016 at 03:41:23PM +0100, Robert Bragg wrote:
>  int __must_check i915_gem_evict_something(struct drm_device *dev,
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
> b/drivers/gpu/drm/i915/i915_gem_context.c
> index bd13d08..b4de357 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -134,6 +134,24 @@ static int get_context_size(struct drm_i915_private 
> *dev_priv)
>   return ret;
>  }
>  
> +int i915_gem_context_pin_legacy_rcs_state(struct drm_i915_private *dev_priv,
> +   struct i915_gem_context *ctx)
> +{
> + int ret;
> +
> + lockdep_assert_held(>i915->drm.struct_mutex);
> +
> + ret = i915_gem_obj_ggtt_pin(ctx->engine[RCS].state,
> + ctx->ggtt_alignment,
> + 0);
> + if (ret)
> + return ret;
> +
> + i915_oa_legacy_context_pin_notify(dev_priv, ctx);
> +
> + return 0;
> +}

I am still not all at all happy with this. I hope the recent changes to
do the vma tracking make it clear.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre


[PATCH v3 06/11] drm/i915: Enable i915 perf stream for Haswell OA unit

2016-08-15 Thread Robert Bragg
Gen graphics hardware can be set up to periodically write snapshots of
performance counters into a circular buffer via its Observation
Architecture and this patch exposes that capability to userspace via the
i915 perf interface.

Cc: Chris Wilson 
Signed-off-by: Robert Bragg 
Signed-off-by: Zhenyu Wang 
---
 drivers/gpu/drm/i915/i915_drv.h |  68 ++-
 drivers/gpu/drm/i915/i915_gem_context.c |  22 +-
 drivers/gpu/drm/i915/i915_perf.c| 977 +++-
 drivers/gpu/drm/i915/i915_reg.h | 338 +++
 drivers/gpu/drm/i915/intel_ringbuffer.c |   7 +-
 include/uapi/drm/i915_drm.h |  70 ++-
 6 files changed, 1449 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 95222f0..48595c9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1724,6 +1724,11 @@ struct intel_wm_config {
bool sprites_scaled;
 };

+struct i915_oa_format {
+   u32 format;
+   int size;
+};
+
 struct i915_oa_reg {
i915_reg_t addr;
u32 value;
@@ -1750,11 +1755,6 @@ struct i915_perf_stream_ops {
 */
void (*disable)(struct i915_perf_stream *stream);

-   /* Return: true if any i915 perf records are ready to read()
-* for this stream.
-*/
-   bool (*can_read)(struct i915_perf_stream *stream);
-
/* Call poll_wait, passing a wait queue that will be woken
 * once there is something ready to read() for the stream
 */
@@ -1764,9 +1764,7 @@ struct i915_perf_stream_ops {

/* For handling a blocking read, wait until there is something
 * to ready to read() for the stream. E.g. wait on the same
-* wait queue that would be passed to poll_wait() until
-* ->can_read() returns true (if its safe to call ->can_read()
-* without the i915 perf lock held).
+* wait queue that would be passed to poll_wait().
 */
int (*wait_unlocked)(struct i915_perf_stream *stream);

@@ -1801,11 +1799,26 @@ struct i915_perf_stream {
struct list_head link;

u32 sample_flags;
+   int sample_size;

struct i915_gem_context *ctx;
bool enabled;

-   struct i915_perf_stream_ops *ops;
+   const struct i915_perf_stream_ops *ops;
+};
+
+struct i915_oa_ops {
+   void (*init_oa_buffer)(struct drm_i915_private *dev_priv);
+   int (*enable_metric_set)(struct drm_i915_private *dev_priv);
+   void (*disable_metric_set)(struct drm_i915_private *dev_priv);
+   void (*oa_enable)(struct drm_i915_private *dev_priv);
+   void (*oa_disable)(struct drm_i915_private *dev_priv);
+   void (*update_oacontrol)(struct drm_i915_private *dev_priv);
+   void (*update_hw_ctx_id_locked)(struct drm_i915_private *dev_priv,
+   u32 ctx_id);
+   int (*read)(struct i915_perf_stream *stream,
+   struct i915_perf_read_state *read_state);
+   bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv);
 };

 struct drm_i915_private {
@@ -2100,16 +2113,47 @@ struct drm_i915_private {

struct {
bool initialized;
+
struct mutex lock;
struct list_head streams;

+   spinlock_t hook_lock;
+
struct {
-   u32 metrics_set;
+   struct i915_perf_stream *exclusive_stream;
+
+   u32 specific_ctx_id;
+
+   struct hrtimer poll_check_timer;
+   wait_queue_head_t poll_wq;
+   atomic_t pollin;
+
+   bool periodic;
+   int period_exponent;
+   int timestamp_frequency;
+
+   int tail_margin;
+
+   int metrics_set;

const struct i915_oa_reg *mux_regs;
int mux_regs_len;
const struct i915_oa_reg *b_counter_regs;
int b_counter_regs_len;
+
+   struct {
+   struct drm_i915_gem_object *obj;
+   u32 gtt_offset;
+   u8 *addr;
+   int format;
+   int format_size;
+   } oa_buffer;
+
+   u32 gen7_latched_oastatus1;
+
+   struct i915_oa_ops ops;
+   const struct i915_oa_format *oa_formats;
+   int n_builtin_sets;
} oa;
} perf;

@@ -3476,6 +3520,8 @@ struct drm_i915_gem_object *
 i915_gem_alloc_context_obj(struct drm_device *dev, size_t size);
 struct i915_gem_context *
 i915_gem_context_create_gvt(struct drm_device *dev);
+int i915_gem_context_pin_legacy_rcs_state(struct drm_i915_private *dev_priv,
+