We want the ability to dispatch a set of command buffer to the
hardware, each with a different OA configuration. To achieve this, we
reuse a couple of fields from the execbuf2 struct (I CAN HAZ
execbuf3?) to notify what OA configuration should be used for a batch
buffer. This requires the process making the execbuf with this flag to
also own the perf fd at the time of execbuf.

Signed-off-by: Lionel Landwerlin <[email protected]>
---
 drivers/gpu/drm/i915/i915_drv.c            |  4 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 60 +++++++++++++++++++---
 drivers/gpu/drm/i915/i915_request.c        |  4 ++
 drivers/gpu/drm/i915/i915_request.h        |  2 +
 drivers/gpu/drm/i915/intel_lrc.c           | 13 ++++-
 drivers/gpu/drm/i915/intel_ringbuffer.c    | 11 +++-
 include/uapi/drm/i915_drm.h                | 12 ++++-
 7 files changed, 97 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 193023427b40..564c2e749fd8 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -444,6 +444,10 @@ static int i915_getparam_ioctl(struct drm_device *dev, 
void *data,
        case I915_PARAM_MMAP_GTT_COHERENT:
                value = INTEL_INFO(dev_priv)->has_coherent_ggtt;
                break;
+       case I915_PARAM_HAS_EXEC_PERF_CONFIG:
+               /* Obviously requires perf support. */
+               value = dev_priv->perf.initialized;
+               break;
        default:
                DRM_DEBUG("Unknown parameter %d\n", param->param);
                return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 09187286d346..8b963641f142 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -286,6 +286,8 @@ struct i915_execbuffer {
         */
        int lut_size;
        struct hlist_head *buckets; /** ht for relocation handles */
+
+       struct i915_vma *oa_config; /** HW configuration for OA, NULL is not 
needed. */
 };
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1121,6 +1123,32 @@ static void clflush_write32(u32 *addr, u32 value, 
unsigned int flushes)
                *addr = value;
 }
 
+static int
+get_execbuf_oa_config(struct drm_i915_private *dev_priv,
+                     int perf_fd, u32 oa_config_id,
+                     struct i915_vma **out_oa_vma)
+{
+       struct file *perf_file;
+       int ret;
+
+       if (!dev_priv->perf.oa.exclusive_stream)
+               return -EINVAL;
+
+       perf_file = fget(perf_fd);
+       if (!perf_file)
+               return -EINVAL;
+
+       if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream)
+               return -EINVAL;
+
+       fput(perf_file);
+
+       ret = i915_perf_get_oa_config(dev_priv, oa_config_id,
+                                     NULL, out_oa_vma);
+
+       return ret;
+}
+
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
                             struct i915_vma *vma,
                             unsigned int len)
@@ -1173,6 +1201,9 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
                goto err_unpin;
        }
 
+       rq->oa_config = eb->oa_config;
+       eb->oa_config = NULL;
+
        err = i915_request_await_object(rq, vma->obj, true);
        if (err)
                goto err_request;
@@ -1875,12 +1906,15 @@ static bool i915_gem_check_execbuffer(struct 
drm_i915_gem_execbuffer2 *exec)
                        return false;
        }
 
-       if (exec->DR4 == 0xffffffff) {
-               DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
-               exec->DR4 = 0;
+       /* We reuse DR1 & DR4 fields for passing the perf config detail. */
+       if (!(exec->flags & I915_EXEC_PERF_CONFIG)) {
+               if (exec->DR4 == 0xffffffff) {
+                       DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
+                       exec->DR4 = 0;
+               }
+               if (exec->DR1 || exec->DR4)
+                       return false;
        }
-       if (exec->DR1 || exec->DR4)
-               return false;
 
        if ((exec->batch_start_offset | exec->batch_len) & 0x7)
                return false;
@@ -2224,6 +2258,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
        eb.buffer_count = args->buffer_count;
        eb.batch_start_offset = args->batch_start_offset;
        eb.batch_len = args->batch_len;
+       eb.oa_config = NULL;
 
        eb.batch_flags = 0;
        if (args->flags & I915_EXEC_SECURE) {
@@ -2253,9 +2288,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
                }
        }
 
+       if (args->flags & I915_EXEC_PERF_CONFIG) {
+               err = get_execbuf_oa_config(eb.i915, args->DR1, args->DR4,
+                                           &eb.oa_config);
+               if (err)
+                       goto err_out_fence;
+       }
+
        err = eb_create(&eb);
        if (err)
-               goto err_out_fence;
+               goto err_perf;
 
        GEM_BUG_ON(!eb.lut_size);
 
@@ -2365,6 +2407,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
                goto err_batch_unpin;
        }
 
+       eb.request->oa_config = eb.oa_config;
+       eb.oa_config = NULL;
+
        if (in_fence) {
                err = i915_request_await_dma_fence(eb.request, in_fence);
                if (err < 0)
@@ -2426,6 +2471,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
        i915_gem_context_put(eb.ctx);
 err_destroy:
        eb_destroy(&eb);
+err_perf:
+       if (eb.oa_config)
+               i915_vma_put(eb.oa_config);
 err_out_fence:
        if (out_fence_fd != -1)
                put_unused_fd(out_fence_fd);
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index abd4dacbab8e..8fb134793925 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -379,6 +379,9 @@ static void i915_request_retire(struct i915_request 
*request)
 
        unreserve_gt(request->i915);
 
+       if (request->oa_config)
+               i915_vma_put(request->oa_config);
+
        i915_sched_node_fini(request->i915, &request->sched);
        i915_request_put(request);
 }
@@ -704,6 +707,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct 
i915_gem_context *ctx)
        rq->batch = NULL;
        rq->capture_list = NULL;
        rq->waitboost = false;
+       rq->oa_config = NULL;
 
        /*
         * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_request.h 
b/drivers/gpu/drm/i915/i915_request.h
index 90e9d170a0cd..7a42c9b94877 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -188,6 +188,8 @@ struct i915_request {
        struct drm_i915_file_private *file_priv;
        /** file_priv list entry for this request */
        struct list_head client_link;
+
+       struct i915_vma *oa_config; /** HW configuration for OA, NULL is not 
needed. */
 };
 
 #define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b240332838c1..d3d8c0c60d65 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1858,6 +1858,8 @@ static int gen8_emit_bb_start(struct i915_request *rq,
 {
        u32 *cs;
        int ret;
+       bool use_oa_config =
+               rq->i915->perf.oa.exclusive_stream && rq->oa_config;
 
        /* Don't rely in hw updating PDPs, specially in lite-restore.
         * Ideally, we should set Force PD Restore in ctx descriptor,
@@ -1875,10 +1877,19 @@ static int gen8_emit_bb_start(struct i915_request *rq,
                rq->gem_context->ppgtt->pd_dirty_rings &= 
~intel_engine_flag(rq->engine);
        }
 
-       cs = intel_ring_begin(rq, 6);
+       cs = intel_ring_begin(rq, use_oa_config ? 10 : 6);
        if (IS_ERR(cs))
                return PTR_ERR(cs);
 
+       if (use_oa_config) {
+               u32 oa_config_offset = i915_ggtt_offset(rq->oa_config);
+
+               *cs++ = MI_BATCH_BUFFER_START_GEN8;
+               *cs++ = oa_config_offset;
+               *cs++ = 0;
+               *cs++ = MI_NOOP;
+       }
+
        /*
         * WaDisableCtxRestoreArbitration:bdw,chv
         *
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b8a7a014d46d..d8ebcf91ce93 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2037,11 +2037,20 @@ hsw_emit_bb_start(struct i915_request *rq,
                  unsigned int dispatch_flags)
 {
        u32 *cs;
+       bool use_oa_config =
+               rq->i915->perf.oa.exclusive_stream && rq->oa_config;
 
-       cs = intel_ring_begin(rq, 2);
+       cs = intel_ring_begin(rq, use_oa_config ? 4 : 2);
        if (IS_ERR(cs))
                return PTR_ERR(cs);
 
+       if (use_oa_config) {
+               u32 oa_config_offset = i915_ggtt_offset(rq->oa_config);
+
+               *cs++ = MI_BATCH_BUFFER_START;
+               *cs++ = oa_config_offset;
+       }
+
        *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
                0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW);
        /* bit0-7 is the length on GEN6+ */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 62f669030741..4f0b39796d80 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -559,6 +559,8 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_MMAP_GTT_COHERENT   52
 
+#define I915_PARAM_HAS_EXEC_PERF_CONFIG 53
+
 typedef struct drm_i915_getparam {
        __s32 param;
        /*
@@ -1078,7 +1080,15 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_ARRAY   (1<<19)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
+/* Request that perf monitoring hardware be reprogrammed before executing the
+ * commands from the batch in the execbuf. The DR1 & DR4 fields of the execbuf
+ * must respectively contain the file descriptor of the perf monitoring device
+ * and the configuration to program.
+ */
+#define I915_EXEC_PERF_CONFIG   (1<<20)
+
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_PERF_CONFIG<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK      (0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to