Hi guys,
I thought I'd attach this, as it is now gone 2AM and I doubt I'm going
to finish it "tonight". I was hoping to elicit some initial review to
suggest whether the design was sane or not.
I'd originally imagined tying the profiling lifetime to the execution /
completion of individual batch-buffers, but for now I'd like to get it
partly working like this, and perhaps develop some user-space program to
view the results and see if they make sense.
The basic (kernel side) functionality is there, albeit limited by some
hard-coded timing parameters and buffer sizes. I might look at whether
it makes sense to do some in-kernel over-sampling and percentage
generation, or just spit raw register dumps out to the debugfs interface
for userspace to do that. (Buffer size might play a part in that
decision).
The locking is a little rough, and I probably need to double-buffer the
sample buffers so I can always be sure the debugfs inteface gets a
complete "frame" / "buffer" / whatever.. trace's worth of data. Perhaps
I should put a semaphore around the debug data output which sleeps until
the profiling has been stopped. (E.g. at the end of a frame).
Currently the debugfs routine just takes a spinlock and copies out
whatever samples have been gathered, meaning the only reliable way to
see a full frame's worth of data is for it to be the last frame before
the instrumented client quit. Part of me did wonder about doing a
constant stream of data to userspace.. but then I quickly realised I had
no idea how to do that, and what to do if userspace lets us overrun our
buffers ;)
I've got a libdrm patch to expose the new IOCTL (also attached), but I
don't have a very good solution for hooking that into mesa and
synchronising with frames. I applied a VERY dirty kludge for testing.
Does anyone know if you can pass userdata parameters to a hrtimer? From
the API, it looked not - although in that case, how do you avoid needing
horrid global state variables?
Regards,
--
Peter Clifton
Electrical Engineering Division,
Engineering Department,
University of Cambridge,
9, JJ Thomson Avenue,
Cambridge
CB3 0FA
Tel: +44 (0)7729 980173 - (No signal in the lab!)
Tel: +44 (0)1223 748328 - (Shared lab phone, ask for me)
>From 3a5b5950624e88bcbd44073847d27e11c8199218 Mon Sep 17 00:00:00 2001
From: Peter Clifton <[email protected]>
Date: Sun, 31 Oct 2010 01:27:58 +0000
Subject: [PATCH] Hacky little instdone and instdone1 profiler
---
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/i915_debugfs.c | 1 +
drivers/gpu/drm/i915/i915_dma.c | 7 +
drivers/gpu/drm/i915/i915_drv.h | 12 ++
drivers/gpu/drm/i915/i915_gem.c | 4 +
drivers/gpu/drm/i915/i915_trace_idle.c | 309 ++++++++++++++++++++++++++++++++
include/drm/i915_drm.h | 8 +
7 files changed, 342 insertions(+), 0 deletions(-)
create mode 100644 drivers/gpu/drm/i915/i915_trace_idle.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index fdc833d..45aacf8 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -10,6 +10,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \
i915_gem_debug.o \
i915_gem_evict.o \
i915_gem_tiling.o \
+ i915_trace_idle.o \
i915_trace_points.o \
intel_display.o \
intel_crt.o \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 7698983..82d331a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1043,6 +1043,7 @@ static struct drm_info_list i915_debugfs_list[] = {
{"i915_sr_status", i915_sr_status, 0},
{"i915_opregion", i915_opregion, 0},
{"i915_gem_framebuffer", i915_gem_framebuffer_info, 0},
+ {"i915_trace_idle", i915_trace_idle_debugfs_info, 0},
};
#define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 785ee11..a41da27 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -2057,6 +2057,9 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
dev_priv->mchdev_lock = &mchdev_lock;
spin_unlock(&mchdev_lock);
+ /* XXX: Not sure if this belongs here or not */
+ i915_trace_idle_init (dev);
+
return 0;
out_workqueue_free:
@@ -2077,6 +2080,9 @@ int i915_driver_unload(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev->dev_private;
int ret;
+ /* XXX: Not sure if this belongs here or not */
+ i915_trace_idle_finish (dev);
+
spin_lock(&mchdev_lock);
i915_mch_dev = NULL;
spin_unlock(&mchdev_lock);
@@ -2263,6 +2269,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED),
DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+ DRM_IOCTL_DEF_DRV(I915_TRACE_IDLE, i915_trace_idle_ioctl, DRM_AUTH|DRM_UNLOCKED),
};
int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2c2c19b..274af4b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -707,6 +707,9 @@ typedef struct drm_i915_private {
/* list of fbdev register on this device */
struct intel_fbdev *fbdev;
+
+ /* Idle tracing data */
+ struct trace_idle_data *trace_idle_data;
} drm_i915_private_t;
/** driver private structure attached to each drm_gem_object */
@@ -1015,6 +1018,8 @@ int i915_gem_get_tiling(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
+int i915_trace_idle_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
void i915_gem_load(struct drm_device *dev);
int i915_gem_init_object(struct drm_gem_object *obj);
struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev,
@@ -1114,6 +1119,13 @@ extern int i915_restore_state(struct drm_device *dev);
extern int i915_save_state(struct drm_device *dev);
extern int i915_restore_state(struct drm_device *dev);
+/* i915_trace_idle.c */
+int i915_trace_idle_init(struct drm_device *dev);
+void i915_trace_idle_finish(struct drm_device *dev);
+int i915_trace_idle_start(struct drm_device *dev);
+int i915_trace_idle_stop(struct drm_device *dev);
+int i915_trace_idle_debugfs_info(struct seq_file *m, void *data);
+
/* intel_i2c.c */
extern int intel_setup_gmbus(struct drm_device *dev);
extern void intel_teardown_gmbus(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6c2618d..392b575 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3651,6 +3651,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
return -EINVAL;
}
+ if (args->flags & I915_EXEC_TRACE_IDLE) {
+ DRM_INFO("Batchbuffer with I915_EXEC_TRACE_IDLE\n");
+ }
+
if (args->buffer_count < 1) {
DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_trace_idle.c b/drivers/gpu/drm/i915/i915_trace_idle.c
new file mode 100644
index 0000000..cbb640c
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_trace_idle.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright © 2010 Peter Clifton
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Peter Clifton <[email protected]>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/seq_file.h>
+
+#include <linux/input.h>
+#include <linux/slab.h>
+#include "drmP.h"
+#include "intel_drv.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+
+#define SAMPLE_INTERVAL_US 10
+#define US_TO_NS(x) (x * 1E3L)
+#define MS_TO_NS(x) (x * 1E6L)
+
+/* Number of sample buffer entries */
+#define SAMPLE_BUFFER_LENGTH 8192
+
+/* XXX: OH DEAR GOODNESS DO I HATE HAVING TO MAKE THIS NASTY HACK! */
+static struct drm_device *global_dev = NULL;
+
+struct idle_sample {
+ u32 instdone;
+ u32 instdone1;
+};
+
+struct trace_idle_data {
+ bool tracing;
+ bool warned_overflow;
+ int max_samples;
+ int num_samples;
+ struct idle_sample *samples;
+ struct hrtimer timer;
+ spinlock_t samples_lock;
+};
+
+
+static enum hrtimer_restart
+i915_trace_idle_timer_callback(struct hrtimer *timer)
+{
+ struct drm_device *dev = global_dev; /* XXX: SHOULD BE PASSED TO THE TIMER SOMEHOW? */
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct trace_idle_data *idle_data = dev_priv->trace_idle_data;
+ struct idle_sample *sample;
+ u32 instdone;
+ u32 instdone1;
+ unsigned long irqflags;
+
+ if (INTEL_INFO(dev)->gen < 4) {
+ instdone = I915_READ(INSTDONE);
+ instdone1 = 0;
+ } else {
+ instdone = I915_READ(INSTDONE_I965);
+ instdone1 = I915_READ(INSTDONE1);
+ }
+
+ /* Obtain a lock to ensure we don't colide with data readout */
+ spin_lock_irqsave(&idle_data->samples_lock, irqflags);
+
+ if (idle_data->num_samples == idle_data->max_samples) {
+ if (!idle_data->warned_overflow)
+ printk(KERN_ERR "Overflow in trace idle buffer\n");
+ idle_data->warned_overflow = true;
+ return HRTIMER_NORESTART;
+ }
+
+ sample = &idle_data->samples[idle_data->num_samples++];
+ sample->instdone = instdone;
+ sample->instdone1 = instdone1;
+
+ /* Release the lock */
+ spin_unlock_irqrestore(&idle_data->samples_lock, irqflags);
+
+ hrtimer_forward_now(timer, ns_to_ktime(US_TO_NS(SAMPLE_INTERVAL_US)));
+ return HRTIMER_RESTART;
+}
+
+int
+i915_trace_idle_init(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct trace_idle_data *idle_data;
+ int ret;
+
+ /* Allocate our book-keeping structure */
+ idle_data = kzalloc (sizeof (*idle_data), GFP_KERNEL);
+ if (!idle_data) {
+ printk(KERN_ERR "Failed to allocate idle tracing sample buffer\n");
+ return -ENOMEM;
+ }
+
+ idle_data->max_samples = SAMPLE_BUFFER_LENGTH;
+ idle_data->samples_lock = SPIN_LOCK_UNLOCKED;
+
+ /* Allocate memory for the recorded samples */
+ idle_data->samples = kmalloc (idle_data->max_samples *
+ sizeof (struct idle_sample),
+ GFP_KERNEL);
+ if (!idle_data->samples) {
+ printk(KERN_ERR "Failed to allocate idle tracing sample buffer\n");
+ ret = -ENOMEM;
+ goto cleanup_idle_data;
+ }
+
+ dev_priv->trace_idle_data = idle_data;
+
+ /* XXX: THIS NEXT LINE IS MURDERING KITTENS */
+ global_dev = dev;
+
+ printk(KERN_INFO "Initialised support for tracing GPU idle data\n");
+ return 0;
+
+cleanup_idle_data:
+ kfree (idle_data);
+ return ret;
+}
+
+
+void
+i915_trace_idle_finish(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct trace_idle_data *idle_data = dev_priv->trace_idle_data;
+ if (idle_data->tracing)
+ i915_trace_idle_stop(dev);
+
+ kfree (idle_data->samples);
+ kfree (idle_data);
+ dev_priv->trace_idle_data = NULL;
+
+ printk(KERN_INFO "Cleaned up support for tracing GPU idle data\n");
+}
+
+int
+i915_trace_idle_start(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct trace_idle_data *idle_data = dev_priv->trace_idle_data;
+ unsigned long irqflags;
+ ktime_t ktime;
+
+ if (!idle_data) {
+ printk(KERN_ERR "called with no initialization\n");
+ return -EINVAL;
+ }
+
+ /* Obtain a lock to ensure we don't colide with data readout */
+ spin_lock_irqsave(&idle_data->samples_lock, irqflags);
+
+ if (idle_data->tracing) {
+ /* XXX: A race between two clients doing idle tracing? */
+ /* Release the lock */
+ spin_unlock_irqrestore(&idle_data->samples_lock, irqflags);
+ printk(KERN_INFO "Already tracing GPU idle performance\n");
+ return -EBUSY;
+ }
+
+ /* Zero any previous samples recorded */
+ idle_data->num_samples = 0;
+ idle_data->tracing = true;
+ idle_data->warned_overflow = false;
+
+ /* Release the lock */
+ spin_unlock_irqrestore(&idle_data->samples_lock, irqflags);
+
+ hrtimer_init(&idle_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ idle_data->timer.function = &i915_trace_idle_timer_callback;
+ /* XXX: Wouldn't it be nice if we could pass data to the timer callback? */
+
+ ktime = ktime_set(0, US_TO_NS(SAMPLE_INTERVAL_US));
+ hrtimer_start(&idle_data->timer, ktime, HRTIMER_MODE_REL);
+ return 0;
+}
+
+int
+i915_trace_idle_stop(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct trace_idle_data *idle_data = dev_priv->trace_idle_data;
+ unsigned long irqflags;
+
+ if (!idle_data) {
+ printk(KERN_ERR "called with no initialization\n");
+ return -EINVAL;
+ }
+
+ /* Obtain a lock to ensure we don't colide with data readout */
+ spin_lock_irqsave(&idle_data->samples_lock, irqflags);
+
+ if (!idle_data->tracing) {
+ /* XXX: A race between two clients doing idle tracing? */
+ printk(KERN_INFO "Not currently tracing GPU idle performance\n");
+ return -EINVAL;
+ }
+
+ idle_data->tracing = false;
+
+ /* Release the lock */
+ spin_unlock_irqrestore(&idle_data->samples_lock, irqflags);
+
+ hrtimer_cancel(&idle_data->timer);
+ return 0;
+}
+
+int
+i915_trace_idle_debugfs_info(struct seq_file *m, void *data)
+{
+ struct drm_info_node *node = (struct drm_info_node *) m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct trace_idle_data *idle_data = dev_priv->trace_idle_data;
+ unsigned long irqflags;
+ struct idle_sample *samples;
+ int num_samples;
+ bool warned_overflow;
+ bool tracing;
+ int i;
+
+ if (!idle_data) {
+ seq_printf(m, "Idle tracing not inisialized\n");
+ return 0;
+ }
+
+ /* Allocate some space to copy the data to. */
+ /* We can't do this whilst holding the spinlock. Since I don't
+ * know if seq_printf and friends are safe to call whilst I hold
+ * a spinlock, I'm copying the data here.
+ */
+ samples = kmalloc (SAMPLE_BUFFER_LENGTH * sizeof (struct idle_sample),
+ GFP_KERNEL);
+ if (!samples) {
+ printk(KERN_ERR "Failed to allocate temporary sample buffer for output\n");
+ return -ENOMEM;
+ }
+
+ /* Obtain a lock to ensure we don't colide with the sampling timer */
+ spin_lock_irqsave(&idle_data->samples_lock, irqflags);
+
+ /* Copy the samples */
+ num_samples = idle_data->num_samples;
+ warned_overflow = idle_data->warned_overflow;
+ tracing = idle_data->tracing;
+
+ if (num_samples > 0)
+ memcpy (samples, idle_data->samples,
+ num_samples * sizeof (struct idle_sample));
+
+ /* Release the lock */
+ spin_unlock_irqrestore(&idle_data->samples_lock, irqflags);
+
+ seq_printf(m, "The sample buffer has %d samples out of a max possible of %d\n",
+ num_samples, SAMPLE_BUFFER_LENGTH);
+ if (warned_overflow)
+ seq_printf(m, "The sample buffer overflowed, so later samples were lost\n");
+
+ seq_printf(m, "SAMPLE: INSTDONE INSTDONE1\n");
+ for (i = 0; i < num_samples; i++) {
+ seq_printf(m, "%06d: 0x%08x 0x%08x\n",
+ i, samples[i].instdone, samples[i].instdone1);
+ }
+ seq_printf(m, "END\n");
+
+ kfree (samples);
+ return 0;
+}
+
+
+/* IOCTL controlling idle tracing */
+int
+i915_trace_idle_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_i915_trace_idle *args = data;
+
+ if (args->start_trace)
+ i915_trace_idle_start (dev);
+ else
+ i915_trace_idle_stop (dev);
+
+ return 0;
+};
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 8c641be..32eb48f 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -198,6 +198,7 @@ typedef struct _drm_i915_sarea {
#define DRM_I915_OVERLAY_PUT_IMAGE 0x27
#define DRM_I915_OVERLAY_ATTRS 0x28
#define DRM_I915_GEM_EXECBUFFER2 0x29
+#define DRM_I915_TRACE_IDLE 0x30
#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -239,6 +240,7 @@ typedef struct _drm_i915_sarea {
#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
#define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE DRM_IOW(DRM_COMMAND_BASE + DRM_IOCTL_I915_OVERLAY_ATTRS, struct drm_intel_overlay_put_image)
#define DRM_IOCTL_I915_OVERLAY_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs)
+#define DRM_IOCTL_I915_TRACE_IDLE DRM_IOW(DRM_COMMAND_BASE + DRM_I915_TRACE_IDLE, struct drm_i915_trace_idle)
/* Allow drivers to submit batchbuffers directly to hardware, relying
* on the security mechanisms provided by hardware.
@@ -633,11 +635,17 @@ struct drm_i915_gem_execbuffer2 {
#define I915_EXEC_RENDER (1<<0)
#define I915_EXEC_BSD (2<<0)
#define I915_EXEC_BLT (3<<0)
+
+#define I915_EXEC_TRACE_IDLE (1<<3)
__u64 flags;
__u64 rsvd1;
__u64 rsvd2;
};
+struct drm_i915_trace_idle {
+ __u32 start_trace;
+};
+
struct drm_i915_gem_pin {
/** Handle of the buffer to be pinned. */
__u32 handle;
--
1.7.1
>From 9178e7600a31b5dcf52ae216125da9d4ef8703f2 Mon Sep 17 00:00:00 2001
From: Peter Clifton <[email protected]>
Date: Sun, 31 Oct 2010 02:17:25 +0000
Subject: [PATCH] Expose the IOCTL for tracing the GPU's idle status
---
include/drm/i915_drm.h | 8 ++++++++
intel/intel_bufmgr.c | 5 +++++
intel/intel_bufmgr.h | 1 +
intel/intel_bufmgr_gem.c | 21 +++++++++++++++++++++
intel/intel_bufmgr_priv.h | 2 ++
5 files changed, 37 insertions(+), 0 deletions(-)
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 19da2c0..874f721 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -189,6 +189,7 @@ typedef struct _drm_i915_sarea {
#define DRM_I915_OVERLAY_PUT_IMAGE 0x27
#define DRM_I915_OVERLAY_ATTRS 0x28
#define DRM_I915_GEM_EXECBUFFER2 0x29
+#define DRM_I915_TRACE_IDLE 0x30
#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -230,6 +231,7 @@ typedef struct _drm_i915_sarea {
#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
#define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE DRM_IOW(DRM_COMMAND_BASE + DRM_IOCTL_I915_OVERLAY_ATTRS, struct drm_intel_overlay_put_image)
#define DRM_IOCTL_I915_OVERLAY_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs)
+#define DRM_IOCTL_I915_TRACE_IDLE DRM_IOW(DRM_COMMAND_BASE + DRM_I915_TRACE_IDLE, struct drm_i915_trace_idle)
/* Allow drivers to submit batchbuffers directly to hardware, relying
* on the security mechanisms provided by hardware.
@@ -625,11 +627,17 @@ struct drm_i915_gem_execbuffer2 {
#define I915_EXEC_RENDER (1<<0)
#define I915_EXEC_BSD (2<<0)
#define I915_EXEC_BLT (3<<0)
+
+#define I915_EXEC_TRACE_IDLE (1<<3)
__u64 flags;
__u64 rsvd1;
__u64 rsvd2;
};
+struct drm_i915_trace_idle {
+ __u32 start_trace;
+};
+
struct drm_i915_gem_pin {
/** Handle of the buffer to be pinned. */
__u32 handle;
diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
index 2b4e888..184365c 100644
--- a/intel/intel_bufmgr.c
+++ b/intel/intel_bufmgr.c
@@ -274,3 +274,8 @@ int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
return bufmgr->get_pipe_from_crtc_id(bufmgr, crtc_id);
return -1;
}
+
+int drm_intel_trace_idle(drm_intel_bufmgr *bufmgr, int start_trace)
+{
+ return bufmgr->trace_idle(bufmgr, start_trace);
+}
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 9df5168..c390686 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -135,6 +135,7 @@ int drm_intel_bo_madvise(drm_intel_bo *bo, int madv);
int drm_intel_bo_disable_reuse(drm_intel_bo *bo);
int drm_intel_bo_is_reusable(drm_intel_bo *bo);
int drm_intel_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo);
+int drm_intel_trace_idle(drm_intel_bufmgr *bufmgr, int start_trace);
/* drm_intel_bufmgr_gem.c */
drm_intel_bufmgr *drm_intel_bufmgr_gem_init(int fd, int batch_size);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 37a3691..ba840cd 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -2022,6 +2022,26 @@ drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
return 0;
}
+static int
+drm_intel_gem_trace_idle(drm_intel_bufmgr *bufmgr, int start_trace)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
+ struct drm_i915_trace_idle trace_idle;
+ int ret;
+
+ memset(&trace_idle, 0, sizeof(trace_idle));
+ trace_idle.start_trace = start_trace;
+ ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_TRACE_IDLE, &trace_idle);
+
+ if (ret) {
+ fprintf(stderr, "DRM_IOCTL_I915_TRACE_IDLE failed: %s\n",
+ strerror(errno));
+ return -errno;
+ }
+
+ return 0;
+}
+
static void
add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
{
@@ -2207,6 +2227,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
drm_intel_gem_get_pipe_from_crtc_id;
bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
+ bufmgr_gem->bufmgr.trace_idle = drm_intel_gem_trace_idle;
init_cache_buckets(bufmgr_gem);
diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
index 87e91e7..7a2594f 100644
--- a/intel/intel_bufmgr_priv.h
+++ b/intel/intel_bufmgr_priv.h
@@ -276,6 +276,8 @@ struct _drm_intel_bufmgr {
/** Returns true if target_bo is in the relocation tree rooted at bo. */
int (*bo_references) (drm_intel_bo *bo, drm_intel_bo *target_bo);
+ int (*trace_idle) (drm_intel_bufmgr *bufmgr, int start_trace);
+
/**< Enables verbose debugging printouts */
int debug;
};
--
1.7.1
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 9b39823..005794b 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -125,6 +125,8 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
if (intel->first_post_swapbuffers_batch == NULL) {
intel->first_post_swapbuffers_batch = intel->batch->buf;
drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
+ /* XXX: HACK PUTTING THIS HERE */
+ drm_intel_trace_idle (intel->bufmgr, 1);
}
if (used == 0)
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 7ace50b..577f7f8 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -566,6 +566,8 @@ intel_glFlush(struct gl_context *ctx)
intel_flush(ctx);
intel_flush_front(ctx);
intel->need_throttle = GL_TRUE;
+ /* XXX: HACK PUTTING THIS HERE */
+ drm_intel_trace_idle (intel->bufmgr, 0);
}
void
_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx