[Intel-gfx] [CI 11/13] drm/i915: add a new perf configuration execbuf parameter

2019-09-09 Thread Lionel Landwerlin
We want the ability to dispatch a set of command buffer to the
hardware, each with a different OA configuration. To achieve this, we
reuse a couple of fields from the execbuf2 struct (I CAN HAZ
execbuf3?) to notify what OA configuration should be used for a batch
buffer. This requires the process making the execbuf with this flag to
also own the perf fd at the time of execbuf.

v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris)
Move oa_config vma to active (Chris)

v3: Don't drop the lock for engine lookup (Chris)
Move OA config vma to active before writing the ringbuffer (Chris)

v4: Reuse i915_user_extension_fn
Serialize requests with OA config updates

v5: Check that the chained extension is only present once (Chris)
Unpin oa_vma in main path (Chris)

v6: Use BIT_ULL (Chris)

v7: Hold drm.struct_mutex when serializing the request with OA config (Chris)

v8: Remove active request from engine (Lionel)

v9: Move fetching OA configuration pass engine pinning (Lionel)
Lock VMA before moving to active (Chris)

v10: Fix leak on perf_fd (Lionel)

Signed-off-by: Lionel Landwerlin 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 147 +-
 drivers/gpu/drm/i915/i915_getparam.c  |   4 +
 include/uapi/drm/i915_drm.h   |  39 +
 3 files changed, 188 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 46ad8d9642d1..d416b60c94bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -24,6 +24,7 @@
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
 #include "i915_gem_ioctls.h"
+#include "i915_perf.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
 
@@ -284,7 +285,12 @@ struct i915_execbuffer {
struct {
u64 flags; /** Available extensions parameters */
struct drm_i915_gem_execbuffer_ext_timeline_fences 
timeline_fences;
+   struct drm_i915_gem_execbuffer_ext_perf perf_config;
} extensions;
+
+   struct file *perf_file;
+   struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is 
not needed. */
+   struct i915_vma *oa_vma;
 };
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1152,6 +1158,58 @@ static int reloc_move_to_gpu(struct i915_request *rq, 
struct i915_vma *vma)
return err;
 }
 
+
+static int
+eb_get_oa_config(struct i915_execbuffer *eb)
+{
+   struct drm_i915_gem_object *oa_bo;
+   int err = 0;
+
+   eb->perf_file = NULL;
+   eb->oa_config = NULL;
+   eb->oa_vma = NULL;
+
+   if ((eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) 
== 0)
+   return 0;
+
+   eb->perf_file = fget(eb->extensions.perf_config.perf_fd);
+   if (!eb->perf_file)
+   return -EINVAL;
+
+   err = i915_mutex_lock_interruptible(&eb->i915->drm);
+   if (err)
+   return err;
+
+   if (eb->perf_file->private_data != eb->i915->perf.exclusive_stream)
+   err = -EINVAL;
+
+   mutex_unlock(&eb->i915->drm.struct_mutex);
+
+   if (err)
+   return err;
+
+   if (eb->i915->perf.exclusive_stream->engine != eb->engine)
+   return -EINVAL;
+
+   err = i915_perf_get_oa_config_and_bo(
+   eb->i915->perf.exclusive_stream,
+   eb->extensions.perf_config.oa_config,
+   &eb->oa_config, &oa_bo);
+   if (err)
+   return err;
+
+   eb->oa_vma = i915_vma_instance(oa_bo,
+  &eb->engine->gt->ggtt->vm, NULL);
+   i915_gem_object_put(oa_bo);
+   if (IS_ERR(eb->oa_vma)) {
+   err = PTR_ERR(eb->oa_vma);
+   eb->oa_vma = NULL;
+   return err;
+   }
+
+   return 0;
+}
+
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 struct i915_vma *vma,
 unsigned int len)
@@ -2051,6 +2109,54 @@ add_to_client(struct i915_request *rq, struct drm_file 
*file)
spin_unlock(&file_priv->mm.lock);
 }
 
+static int eb_oa_config(struct i915_execbuffer *eb)
+{
+   struct i915_perf_stream *perf_stream;
+   int err;
+
+   if (!eb->oa_config)
+   return 0;
+
+   perf_stream = eb->perf_file->private_data;
+
+   err = mutex_lock_interruptible(&perf_stream->config_mutex);
+   if (err)
+   return err;
+
+   err = i915_active_request_set(&perf_stream->active_config_rq,
+ eb->request);
+   if (err)
+   goto out;
+
+   /*
+* If the config hasn't changed, skip reconfiguring the HW (this is
+* subject to a delay we want to avoid has much as possible).
+*/
+   if (eb->oa_config == perf_stream->oa_config)
+   goto out;
+
+   i

[Intel-gfx] [CI 11/13] drm/i915: add a new perf configuration execbuf parameter

2019-09-09 Thread Lionel Landwerlin
We want the ability to dispatch a set of command buffer to the
hardware, each with a different OA configuration. To achieve this, we
reuse a couple of fields from the execbuf2 struct (I CAN HAZ
execbuf3?) to notify what OA configuration should be used for a batch
buffer. This requires the process making the execbuf with this flag to
also own the perf fd at the time of execbuf.

v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris)
Move oa_config vma to active (Chris)

v3: Don't drop the lock for engine lookup (Chris)
Move OA config vma to active before writing the ringbuffer (Chris)

v4: Reuse i915_user_extension_fn
Serialize requests with OA config updates

v5: Check that the chained extension is only present once (Chris)
Unpin oa_vma in main path (Chris)

v6: Use BIT_ULL (Chris)

v7: Hold drm.struct_mutex when serializing the request with OA config (Chris)

v8: Remove active request from engine (Lionel)

v9: Move fetching OA configuration pass engine pinning (Lionel)
Lock VMA before moving to active (Chris)

v10: Fix leak on perf_fd (Lionel)

Signed-off-by: Lionel Landwerlin 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 147 +-
 drivers/gpu/drm/i915/i915_getparam.c  |   4 +
 include/uapi/drm/i915_drm.h   |  39 +
 3 files changed, 188 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 46ad8d9642d1..d416b60c94bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -24,6 +24,7 @@
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
 #include "i915_gem_ioctls.h"
+#include "i915_perf.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
 
@@ -284,7 +285,12 @@ struct i915_execbuffer {
struct {
u64 flags; /** Available extensions parameters */
struct drm_i915_gem_execbuffer_ext_timeline_fences 
timeline_fences;
+   struct drm_i915_gem_execbuffer_ext_perf perf_config;
} extensions;
+
+   struct file *perf_file;
+   struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is 
not needed. */
+   struct i915_vma *oa_vma;
 };
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1152,6 +1158,58 @@ static int reloc_move_to_gpu(struct i915_request *rq, 
struct i915_vma *vma)
return err;
 }
 
+
+static int
+eb_get_oa_config(struct i915_execbuffer *eb)
+{
+   struct drm_i915_gem_object *oa_bo;
+   int err = 0;
+
+   eb->perf_file = NULL;
+   eb->oa_config = NULL;
+   eb->oa_vma = NULL;
+
+   if ((eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) 
== 0)
+   return 0;
+
+   eb->perf_file = fget(eb->extensions.perf_config.perf_fd);
+   if (!eb->perf_file)
+   return -EINVAL;
+
+   err = i915_mutex_lock_interruptible(&eb->i915->drm);
+   if (err)
+   return err;
+
+   if (eb->perf_file->private_data != eb->i915->perf.exclusive_stream)
+   err = -EINVAL;
+
+   mutex_unlock(&eb->i915->drm.struct_mutex);
+
+   if (err)
+   return err;
+
+   if (eb->i915->perf.exclusive_stream->engine != eb->engine)
+   return -EINVAL;
+
+   err = i915_perf_get_oa_config_and_bo(
+   eb->i915->perf.exclusive_stream,
+   eb->extensions.perf_config.oa_config,
+   &eb->oa_config, &oa_bo);
+   if (err)
+   return err;
+
+   eb->oa_vma = i915_vma_instance(oa_bo,
+  &eb->engine->gt->ggtt->vm, NULL);
+   i915_gem_object_put(oa_bo);
+   if (IS_ERR(eb->oa_vma)) {
+   err = PTR_ERR(eb->oa_vma);
+   eb->oa_vma = NULL;
+   return err;
+   }
+
+   return 0;
+}
+
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 struct i915_vma *vma,
 unsigned int len)
@@ -2051,6 +2109,54 @@ add_to_client(struct i915_request *rq, struct drm_file 
*file)
spin_unlock(&file_priv->mm.lock);
 }
 
+static int eb_oa_config(struct i915_execbuffer *eb)
+{
+   struct i915_perf_stream *perf_stream;
+   int err;
+
+   if (!eb->oa_config)
+   return 0;
+
+   perf_stream = eb->perf_file->private_data;
+
+   err = mutex_lock_interruptible(&perf_stream->config_mutex);
+   if (err)
+   return err;
+
+   err = i915_active_request_set(&perf_stream->active_config_rq,
+ eb->request);
+   if (err)
+   goto out;
+
+   /*
+* If the config hasn't changed, skip reconfiguring the HW (this is
+* subject to a delay we want to avoid has much as possible).
+*/
+   if (eb->oa_config == perf_stream->oa_config)
+   goto out;
+
+   i