This adds AUB file dump support to generate execution
trace for internal GPU simulator.

Signed-off-by: Zhenyu Wang <[email protected]>
---
 intel/Makefile.am        |    3 +-
 intel/intel_bufmgr.h     |   38 +++++
 intel/intel_bufmgr_gem.c |  402 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 442 insertions(+), 1 deletions(-)

diff --git a/intel/Makefile.am b/intel/Makefile.am
index 1ae92f8..398cd2f 100644
--- a/intel/Makefile.am
+++ b/intel/Makefile.am
@@ -41,7 +41,8 @@ libdrm_intel_la_SOURCES = \
        intel_bufmgr_gem.c \
        intel_chipset.h \
        mm.c \
-       mm.h
+       mm.h \
+       intel_aub.h
 
 libdrm_intelincludedir = ${includedir}/libdrm
 libdrm_intelinclude_HEADERS = intel_bufmgr.h
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index daa18b4..bb4158a 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -35,6 +35,7 @@
 #define INTEL_BUFMGR_H
 
 #include <stdint.h>
+#include <stdio.h>
 
 struct drm_clip_rect;
 
@@ -83,6 +84,39 @@ struct _drm_intel_bo {
        int handle;
 };
 
+enum drm_intel_aub_bmp_format {
+       AUB_DUMP_BMP_LEGACY,
+       AUB_DUMP_BMP_8BIT,
+       AUB_DUMP_BMP_ARGB_0555,
+       AUB_DUMP_BMP_ARGB_0565,
+       AUB_DUMP_BMP_ARGB_4444,
+       AUB_DUMP_BMP_ARGB_1555,
+       AUB_DUMP_BMP_ARGB_0888,
+       AUB_DUMP_BMP_ARGB_8888,
+       AUB_DUMP_BMP_YCRCB_SWAPY,
+       AUB_DUMP_BMP_YCRCB_NORMAL,
+       AUB_DUMP_BMP_YCRCB_SWAPUV,
+       AUB_DUMP_BMP_YCRCB_SWAPUVY,
+       AUB_DUMP_BMP_ABGR_8888,
+};
+
+/*
+ * surface info needed by aub DUMP_BMP block
+ */
+struct drm_intel_aub_surface_bmp {
+       uint16_t x_offset;
+       uint16_t y_offset;
+       uint16_t pitch;
+       uint8_t bits_per_pixel;
+       uint8_t format;
+       uint16_t width;
+       uint16_t height;
+       uint32_t tiling_walk_y:1;
+       uint32_t tiling:1;
+       uint32_t pad:30;
+};
+
+
 #define BO_ALLOC_FOR_RENDER (1<<0)
 
 drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
@@ -150,6 +184,10 @@ int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo);
 void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable);
 
 int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id);
+void drm_intel_bufmgr_gem_set_aubfile(drm_intel_bufmgr *bufmgr, FILE *file);
+void drm_intel_bufmgr_gem_stop_aubfile(drm_intel_bufmgr *bufmgr);
+int drm_intel_gem_aub_dump_bmp(drm_intel_bufmgr *bufmgr, drm_intel_bo *bo,
+                              unsigned int offset, struct 
drm_intel_aub_surface_bmp *bmp);
 
 /* drm_intel_bufmgr_fake.c */
 drm_intel_bufmgr *drm_intel_bufmgr_fake_init(int fd,
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 3cdffce..654bc31 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -57,6 +57,7 @@
 #include "intel_bufmgr.h"
 #include "intel_bufmgr_priv.h"
 #include "intel_chipset.h"
+#include "intel_aub.h"
 #include "string.h"
 
 #include "i915_drm.h"
@@ -75,6 +76,13 @@ struct drm_intel_gem_bo_bucket {
        unsigned long size;
 };
 
+struct drm_intel_aub_bmp {
+       drm_intel_bo *bo; /* surface bo */
+       unsigned int offset;
+       struct drm_intel_aub_surface_bmp bmp;
+       struct drm_intel_aub_bmp *next;
+};
+
 typedef struct _drm_intel_bufmgr_gem {
        drm_intel_bufmgr bufmgr;
 
@@ -106,6 +114,10 @@ typedef struct _drm_intel_bufmgr_gem {
        unsigned int has_relaxed_fencing : 1;
        unsigned int bo_reuse : 1;
        char fenced_relocs;
+
+       FILE *aub_file;
+       uint32_t aub_offset;
+       struct drm_intel_aub_bmp *aub_bmp;
 } drm_intel_bufmgr_gem;
 
 #define DRM_INTEL_RELOC_FENCE (1<<0)
@@ -195,8 +207,396 @@ struct _drm_intel_bo_gem {
         * relocations.
         */
        int reloc_tree_fences;
+
+       uint32_t aub_offset;
 };
 
+/* AUB trace dump support */
+
+static void
+aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data)
+{
+       fwrite(&data, 1, 4, bufmgr_gem->aub_file);
+}
+
+static void
+aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size)
+{
+       fwrite(data, 1, size, bufmgr_gem->aub_file);
+}
+
+static void
+aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size)
+{
+       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+       drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+       uint32_t *data;
+       unsigned int i;
+
+       data = malloc(bo->size);
+       drm_intel_bo_get_subdata(bo, offset, size, data);
+
+       /* Easy mode: write out bo with no relocations */
+       if (!bo_gem->reloc_count) {
+               aub_out_data(bufmgr_gem, data, size);
+               free(data);
+               return;
+       }
+
+       /* Otherwise, handle the relocations while writing. */
+       for (i = 0; i < size / 4; i++) {
+               int r;
+               for (r = 0; r < bo_gem->reloc_count; r++) {
+                       struct drm_i915_gem_relocation_entry *reloc;
+                       drm_intel_reloc_target *info;
+
+                       reloc = &bo_gem->relocs[r];
+                       info = &bo_gem->reloc_target_info[r];
+
+                       if (reloc->offset == offset + i * 4) {
+                               drm_intel_bo_gem *target_gem;
+                               uint32_t val;
+
+                               target_gem = (drm_intel_bo_gem *)info->bo;
+
+                               val = reloc->delta;
+                               val += target_gem->aub_offset;
+
+                               aub_out(bufmgr_gem, val);
+                               data[i] = val;
+                               break;
+                       }
+               }
+               if (r == bo_gem->reloc_count) {
+                       /* no relocation, just the data */
+                       aub_out(bufmgr_gem, data[i]);
+               }
+       }
+}
+
+static void
+aub_bo_get_address(drm_intel_bo *bo)
+{
+       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+       drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+
+       /* Give the object a graphics address in the AUB file.  We
+        * don't just use the GEM object address because we do AUB
+        * dumping before execution -- we want to successfully log
+        * when the hardware might hang, and we might even want to aub
+        * capture for a driver trying to execute on a different
+        * generation of hardware by disabling the actual kernel exec
+        * call.
+        */
+       bo_gem->aub_offset = bufmgr_gem->aub_offset;
+       bufmgr_gem->aub_offset += bo->size;
+       /* XXX: Handle aperture overflow. */
+       assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024);
+}
+
+static const struct {
+       const char *name;
+       uint32_t type;
+       uint32_t subtype;
+} name_to_type_mapping[] = {
+       { "VS_UNIT",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_VS_STATE},
+       { "GS_UNIT",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_GS_STATE},
+       { "CLIP_UNIT",  AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CL_STATE},
+       { "SF_UNIT",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SF_STATE},
+       { "WM_UNIT",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_WM_STATE},
+       { "CC_UNIT",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CC_STATE},
+       { "CLIP_VP",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CL_VP},
+       { "SF_VP",      AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SF_VP},
+       { "SF_SCISSOR_UNIT",
+         AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SF_SCISSOR_RECT},
+       { "CC_VP",      AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CC_VP},
+       { "SAMPLER",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SAMPLER_STATE},
+       { "SAMPLER_DEFAULT_COLOR", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SDC},
+       { "VS_PROG",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL},
+       { "GS_PROG",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL},
+       { "CLIP_PROG",  AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL},
+       { "SF_PROG",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL},
+       { "WM_PROG",    AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL},
+       { "BLEND_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_BLEND_STATE},
+       { "DEPTH_STENCIL_STATE",
+         AUB_TRACE_TYPE_GENERAL, AUB_TRACE_DEPTH_STENCIL_STATE},
+       { "COLOR_CALC_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CC_STATE},
+       { "SS_SURF_BIND", AUB_TRACE_TYPE_SURFACE, AUB_TRACE_BINDING_TABLE},
+       { "SS_SURFACE", AUB_TRACE_TYPE_SURFACE, AUB_TRACE_SURFACE_STATE},
+       { "temporary VBO", AUB_TRACE_TYPE_VERTEX_BUFFER, 0},
+       { "CURBE",      AUB_TRACE_TYPE_CONSTANT_URB, 0},
+       { "VS constant_bo", AUB_TRACE_TYPE_CONSTANT_BUFFER, 0},
+       { "WM constant_bo", AUB_TRACE_TYPE_CONSTANT_BUFFER, 0},
+       { "INTERFACE_DESC", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_INTERFACE_DESC},
+       { "VLD_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_VLD_STATE},
+       { "VFE_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_VFE_STATE},
+       { "IT_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_IT_STATE},
+       { "DI_SAMPLE_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_DI_SAMPLE_STATE},
+       { "IEF_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_IEF_STATE},
+       { "AVS_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_AVS_STATE},
+};
+
+static void
+aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
+                     uint32_t offset, uint32_t size)
+{
+       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+       drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+
+       aub_out(bufmgr_gem,
+               CMD_AUB_TRACE_HEADER_BLOCK |
+               (5 - 2));
+       aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_GTT | type | 
AUB_TRACE_OP_DATA_WRITE);
+       aub_out(bufmgr_gem, subtype);
+       aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
+       aub_out(bufmgr_gem, size);
+       aub_write_bo_data(bo, offset, size);
+}
+
+static void
+aub_write_bo(drm_intel_bo *bo)
+{
+       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+       drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+       uint32_t type = AUB_TRACE_TYPE_NOTYPE;
+       uint32_t subtype = 0;
+       uint32_t block_size;
+       uint32_t offset;
+       unsigned int i;
+
+       aub_bo_get_address(bo);
+
+       for (i = 0; i < ARRAY_SIZE(name_to_type_mapping); i++) {
+               if (strcmp(bo_gem->name,
+                          name_to_type_mapping[i].name) == 0) {
+                       type = name_to_type_mapping[i].type;
+                       subtype = name_to_type_mapping[i].subtype;
+                       break;
+               }
+       }
+
+       if (type == 0) {
+               DBG("Failed to find type for object %s(size: 0x%lx, aub_offset: 
0x%08x)\n",
+                      bo_gem->name, bo->size, bo_gem->aub_offset);
+       }
+
+
+       /* Break up large objects into multiple writes.  Otherwise a
+        * 128kb VBO would overflow the 16 bits of size field in the
+        * packet header and everything goes badly after that.
+        */
+       for (offset = 0; offset < bo->size; offset += block_size) {
+               block_size = bo->size - offset;
+
+               if (block_size > 2 * 4096)
+                       block_size = 2 * 4096;
+
+               aub_write_trace_block(bo, type, subtype,
+                                     offset, block_size);
+       }
+}
+
+/*
+ * Make a ringbuffer on fly and dump it
+ */
+static void
+aub_generate_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem,
+                         uint32_t batch_buffer, unsigned int flags)
+{
+       uint32_t ringbuffer[1024];
+       int ring = 0;
+
+       switch (flags) {
+       case I915_EXEC_RENDER:
+       case I915_EXEC_DEFAULT:
+               ring = AUB_TRACE_TYPE_RING_PRB0;
+               break;
+       case I915_EXEC_BSD:
+               ring = AUB_TRACE_TYPE_RING_PRB1;
+               break;
+       case I915_EXEC_BLT:
+               ring = AUB_TRACE_TYPE_RING_PRB2;
+               break;
+       }
+
+       aub_out(bufmgr_gem,
+               CMD_AUB_TRACE_HEADER_BLOCK |
+               (5 - 2));
+       aub_out(bufmgr_gem,
+               AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE);
+       aub_out(bufmgr_gem, 0); /* general/surface subtype */
+       aub_out(bufmgr_gem, bufmgr_gem->aub_offset);
+       aub_out(bufmgr_gem, 4096);
+
+       /* Do make a ring buffer here */
+       memset(ringbuffer, AUB_MI_NOOP, sizeof(ringbuffer));
+       ringbuffer[0] = AUB_MI_BATCH_BUFFER_START;
+       ringbuffer[1] = batch_buffer;
+
+       /* FIXME: Need some flush operations here? */
+
+       aub_out_data(bufmgr_gem, ringbuffer, 4096);
+
+       /* Update offset pointer */
+       bufmgr_gem->aub_offset += 4096;
+}
+
+static void
+aub_dump_bmp(drm_intel_bufmgr_gem *bufmgr_gem)
+{
+       struct drm_intel_aub_bmp *p = bufmgr_gem->aub_bmp;
+
+       while(p) {
+               aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4);
+               aub_out(bufmgr_gem, (p->bmp.y_offset << 16) | p->bmp.x_offset);
+               aub_out(bufmgr_gem, (p->bmp.format << 24) |
+                                   (p->bmp.bits_per_pixel << 16) | 
p->bmp.pitch);
+               aub_out(bufmgr_gem, (p->bmp.height << 16) | p->bmp.width);
+               /* surface bo should already be written out */
+               assert(((drm_intel_bo_gem *)p->bo)->aub_offset != 0);
+               aub_out(bufmgr_gem, ((drm_intel_bo_gem *)p->bo)->aub_offset + 
p->offset);
+               aub_out(bufmgr_gem, (p->bmp.tiling << 2) | 
(p->bmp.tiling_walk_y << 3));
+
+               bufmgr_gem->aub_bmp = p->next;
+               free(p);
+               p = bufmgr_gem->aub_bmp;
+       }
+}
+
+static void
+aub_exec(drm_intel_bo *bo, unsigned int flags)
+{
+       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+       drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+       int i;
+
+       if (!bufmgr_gem->aub_file)
+               return;
+
+       /* Write out all but the batchbuffer to AUB memory */
+       for (i = 0; i < bufmgr_gem->exec_count - 1; i++) {
+               if (bufmgr_gem->exec_bos[i] != bo)
+                       aub_write_bo(bufmgr_gem->exec_bos[i]);
+       }
+
+       aub_bo_get_address(bo);
+
+       /* Dump the batchbuffer. */
+       aub_out(bufmgr_gem,
+               CMD_AUB_TRACE_HEADER_BLOCK |
+               (5 - 2));
+       aub_out(bufmgr_gem,
+               AUB_TRACE_MEMTYPE_GTT | AUB_TRACE_TYPE_BATCH | 
AUB_TRACE_OP_DATA_WRITE);
+       aub_out(bufmgr_gem, 0); /* general/surface subtype */
+       aub_out(bufmgr_gem, bo_gem->aub_offset);
+       aub_out(bufmgr_gem, bo_gem->bo.size);
+       aub_write_bo_data(bo, 0, bo_gem->bo.size);
+
+       /* Dump ring buffer */
+       aub_generate_ringbuffer(bufmgr_gem, bo_gem->aub_offset, flags);
+
+       /* Dump BMP file for any requested surface */
+       aub_dump_bmp(bufmgr_gem);
+
+       fflush(bufmgr_gem->aub_file);
+
+       /*
+        * One frame has been dumped. So reset the aub_offset for the next 
frame.
+        *
+        * FIXME: Can we do this?
+        */
+       bufmgr_gem->aub_offset = 0x10000;
+}
+
+/*
+ * Stop dumping data to aub file
+ */
+void drm_intel_bufmgr_gem_stop_aubfile(drm_intel_bufmgr *bufmgr)
+{
+       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
+
+       pthread_mutex_lock(&bufmgr_gem->lock);
+       bufmgr_gem->aub_file = NULL;
+       pthread_mutex_unlock(&bufmgr_gem->lock);
+}
+
+void drm_intel_bufmgr_gem_set_aubfile(drm_intel_bufmgr *bufmgr, FILE *file)
+{
+       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
+       int entry = 0x3; /* uc/valid GTT */
+       int i;
+
+       if (!file)
+               return;
+
+       pthread_mutex_lock(&bufmgr_gem->lock);
+
+       bufmgr_gem->aub_file = file;
+
+       /* Start from 0x10000, since the address below is used for GTT entry 
building */
+       bufmgr_gem->aub_offset = 0x10000;
+
+       /* Start with a (required) version packet. */
+       aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2));
+       aub_out(bufmgr_gem,
+               (4 << AUB_HEADER_MAJOR_SHIFT) |
+               (0 << AUB_HEADER_MINOR_SHIFT));
+       for (i = 0; i < 8; i++) {
+               aub_out(bufmgr_gem, 0); /* app name */
+       }
+       aub_out(bufmgr_gem, 0); /* timestamp */
+       aub_out(bufmgr_gem, 0); /* timestamp */
+       aub_out(bufmgr_gem, 0); /* comment len */
+
+       /* Set up the GTT. The max we can handle is 256M.
+        * Need improvement, dynamicly alloc/write GTT entry
+        * block for each bo, so AubList output won't contain
+        * whole GTT entry block in the first, easier for parse.
+        */
+       for (i = 0x000; i < 0x10000; i += 4, entry += 0x1000) {
+               aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | (5 - 2));
+               aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_NONLOCAL | 0 | 
AUB_TRACE_OP_DATA_WRITE);
+               aub_out(bufmgr_gem, 0);
+               aub_out(bufmgr_gem, i);
+               aub_out(bufmgr_gem, 4);
+               aub_out(bufmgr_gem, entry);
+       }
+
+       pthread_mutex_unlock(&bufmgr_gem->lock);
+}
+
+int drm_intel_gem_aub_dump_bmp(drm_intel_bufmgr *bufmgr,
+                              drm_intel_bo *bo, unsigned int offset,
+                              struct drm_intel_aub_surface_bmp *bmp)
+{
+       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
+       struct drm_intel_aub_bmp *aub_bmp, *p, *last;
+
+       aub_bmp = malloc(sizeof(*aub_bmp));
+
+       aub_bmp->bo = bo;
+       aub_bmp->offset = offset;
+       memcpy(&aub_bmp->bmp, bmp, sizeof(*bmp));
+       aub_bmp->next = NULL;
+
+       pthread_mutex_lock(&bufmgr_gem->lock);
+
+       /* Insert last */
+       p = last = bufmgr_gem->aub_bmp;
+       while (p) {
+               last = p;
+               p = p->next;
+       }
+       if (last == bufmgr_gem->aub_bmp)
+           bufmgr_gem->aub_bmp = aub_bmp;
+       else
+           last->next = aub_bmp;
+
+       pthread_mutex_unlock(&bufmgr_gem->lock);
+
+       return 0;
+}
+
 static unsigned int
 drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
 
@@ -1624,6 +2024,8 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
        execbuf.rsvd1 = 0;
        execbuf.rsvd2 = 0;
 
+       aub_exec(bo, flags);
+
        ret = drmIoctl(bufmgr_gem->fd,
                       DRM_IOCTL_I915_GEM_EXECBUFFER2,
                       &execbuf);
-- 
1.7.2.3

_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to