Preliminary code for shader debugging. This provides a trace like
implementation which can get EU architectural state at each instruction.

The code currently only supports the most basic set of state and
features.

Signed-off-by: Ben Widawsky <[email protected]>
---
 configure.ac                             |    1 +
 debugger/Makefile.am                     |   13 +
 debugger/debug_rdata.c                   |  141 +++++++++
 debugger/eudb.c                          |  493 ++++++++++++++++++++++++++++++
 debugger/sr_loader.c                     |  118 +++++++
 debugger/system_routine/Makefile         |   69 +++++
 debugger/system_routine/eviction_macro.c |   48 +++
 debugger/system_routine/pre_cpp.py       |  123 ++++++++
 debugger/system_routine/sr.g4a           |  277 +++++++++++++++++
 lib/debug.h                              |   92 ++++++
 10 files changed, 1375 insertions(+), 0 deletions(-)
 create mode 100644 debugger/Makefile.am
 create mode 100644 debugger/debug_rdata.c
 create mode 100644 debugger/eudb.c
 create mode 100644 debugger/sr_loader.c
 create mode 100644 debugger/system_routine/Makefile
 create mode 100644 debugger/system_routine/eviction_macro.c
 create mode 100755 debugger/system_routine/pre_cpp.py
 create mode 100644 debugger/system_routine/sr.g4a
 create mode 100644 lib/debug.h

diff --git a/configure.ac b/configure.ac
index 4eb1c48..d2b30dc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -98,5 +98,6 @@ AC_CONFIG_FILES([
        scripts/Makefile
        tests/Makefile
        tools/Makefile
+       debugger/Makefile
 ])
 AC_OUTPUT
diff --git a/debugger/Makefile.am b/debugger/Makefile.am
new file mode 100644
index 0000000..32df7bf
--- /dev/null
+++ b/debugger/Makefile.am
@@ -0,0 +1,13 @@
+SUBDIRS=system_routine
+
+bin_PROGRAMS = \
+       eudb \
+       sr_loader \
+       debug_rdata \
+       $(NULL)
+
+LDADD = ../lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS)
+
+AM_CFLAGS = $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(WARN_CFLAGS) \
+       -I$(srcdir)/.. \
+       -I$(srcdir)/../lib
diff --git a/debugger/debug_rdata.c b/debugger/debug_rdata.c
new file mode 100644
index 0000000..de60775
--- /dev/null
+++ b/debugger/debug_rdata.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <[email protected]>
+ *
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "intel_gpu_tools.h"
+
+struct eu_rdata {
+       union {
+               struct {
+                       uint8_t sendc_dep : 1;
+                       uint8_t swh_dep : 1;
+                       uint8_t pwc_dep : 1;
+                       uint8_t n2_dep : 1;
+                       uint8_t n1_dep : 1;
+                       uint8_t n0_dep : 1;
+                       uint8_t flag1_dep : 1;
+                       uint8_t flag0_dep : 1;
+                       uint8_t indx_dep : 1;
+                       uint8_t mrf_dep : 1;
+                       uint8_t dst_dep : 1;
+                       uint8_t src2_dep : 1;
+                       uint8_t src1_dep : 1;
+                       uint8_t src0_dep : 1;
+                       uint8_t mp_dep_pin : 1;
+                       uint8_t sp_dep_pin : 1;
+                       uint8_t fftid : 8;
+                       uint8_t ffid : 4;
+                       uint8_t instruction_valid : 1;
+                       uint8_t thread_status : 3;
+               };
+               uint32_t dword;
+       } ud0;
+
+       union {
+               struct {
+                       uint8_t mrf_addr : 4;
+                       uint8_t dst_addr : 7;
+                       uint8_t src2_addr : 7;
+                       uint8_t src1_addr : 7;
+                       uint8_t src0_addr : 7;
+               };
+               uint32_t dword;
+       } ud1;
+
+       union {
+               struct {
+                       uint16_t exip : 12;
+                       uint8_t opcode : 7;
+                       uint8_t pwc : 8;
+                       uint8_t instruction_valid : 1;
+                       uint8_t mbz : 4;
+               };
+               uint32_t dword;
+       } ud2;
+};
+
+const char *thread_status[] = 
+       {"INVALID", "invalid/no thread", "standby (dependency)", "INVALID", 
"Executing",
+        "INVALID" , "INVALID" , "INVALID"};
+
+static struct eu_rdata
+collect_rdata(int eu, int tid) {
+       struct eu_rdata rdata;
+
+       intel_register_write(0x7800, eu << 16 | (3 * tid) << 8);
+       rdata.ud0.dword = intel_register_read(0x7840);
+
+       intel_register_write(0x7800, eu << 16 | (3 * tid + 1) << 8);
+       rdata.ud1.dword = intel_register_read(0x7840);
+
+       intel_register_write(0x7800, eu << 16 | (3 * tid + 2) << 8);
+       rdata.ud2.dword = intel_register_read(0x7840);
+
+       return rdata;
+}
+static void
+print_rdata(struct eu_rdata rdata) {
+       printf("\t%s\n", thread_status[rdata.ud0.thread_status]);
+       printf("\tn1_dep: %d\n", rdata.ud0.n1_dep);
+       printf("\tpwc_dep: %d\n", rdata.ud0.pwc_dep);
+       printf("\tswh_dep: %d\n", rdata.ud0.swh_dep);
+       printf("\tsource 0 %x\n", rdata.ud1.src0_addr);
+       printf("\tsource 1 %x\n", rdata.ud1.src1_addr);
+       printf("\tsource 2 %x\n", rdata.ud1.src2_addr);
+       printf("\tdest  %x\n", rdata.ud1.dst_addr);
+       printf("\tmrf  %x\n", rdata.ud1.mrf_addr);
+       printf("\tIP: %x\n", rdata.ud2.exip);
+       printf("\topcode: %x\n", rdata.ud2.opcode);
+}
+
+static void
+find_stuck_threads(void)
+{
+       int i, j;
+       for (i = 0; i < 15; i++)
+               for (j = 0; j < 5; j++) {
+                       struct eu_rdata rdata;
+                       rdata = collect_rdata(i, j);
+                       if (rdata.ud0.thread_status == 2 ||
+                           rdata.ud0.thread_status == 4) {
+                               printf("%d %d:\n", i, j);
+                               print_rdata(rdata);
+                       }
+       }
+}
+
+int main(int argc, char *argv[]) {
+       struct pci_device *pci_dev;
+       pci_dev = intel_get_pci_device();
+
+       intel_register_access_init(pci_dev, 1, 2);
+       find_stuck_threads();
+//     collect_rdata(atoi(argv[1]), atoi(argv[2]));
+       return 0;
+}
diff --git a/debugger/eudb.c b/debugger/eudb.c
new file mode 100644
index 0000000..b90e7a7
--- /dev/null
+++ b/debugger/eudb.c
@@ -0,0 +1,493 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <[email protected]>
+ *
+ * Notes:
+ *
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <strings.h>
+#include <assert.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_chipset.h"
+#include "intel_bufmgr.h"
+#include "intel_gpu_tools.h"
+#include "intel_batchbuffer.h"
+#include "debug.h"
+
+/*
+ * This should come from a shared interface with the DRI client that is
+ * submitting the program to be debugged.
+ */
+#ifndef PER_THREAD_SCRATCH
+       #define PER_THREAD_SCRATCH      (1 << 20)
+#endif
+
+#define EU_ATT         0x7810
+#define EU_ATT_CLR     0x7830
+
+#define RSVD_EU -1
+#define RSVD_THREAD -1
+#define RSVD_ID EUID(-1, -1, -1)
+
+enum {
+       EBAD_SHMEM,
+       EBAD_PROTOCOL,
+       EBAD_MAGIC,
+       EBAD_WRITE
+};
+
+struct debuggee {
+       int euid;
+       int tid;
+       int fd;
+       int clr;
+       uint32_t reg;
+};
+
+struct debugger {
+       struct debuggee *debuggees;
+       int num_threads;
+       int real_num_threads;
+       int threads_per_eu;
+} *eu_info;
+
+drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+drm_intel_bo *scratch_bo;
+
+int handle;
+int drm_fd;
+int debug_fd = 0;
+const char *debug_file = "dump_debug.bin";
+int debug;
+int clear_waits;
+int shutting_down = 0;
+
+/*
+ * The docs are wrong about the attention clear bits. The clear bits are
+ * provided as part of the structure in case they change in future generations.
+ */
+#define EUID(eu, td, clear) \
+       { .euid = eu, .tid = td, .reg = EU_ATT, .fd = -1, .clr = clear }
+#define EUID2(eu, td, clear) \
+       { .euid = eu, .tid = td, .reg = EU_ATT + 4, .fd = -1, .clr = clear }
+struct debuggee gt1_debug_ids[] = {
+       RSVD_ID, RSVD_ID,
+       RSVD_ID, EUID(6, 3, 28), EUID(6, 2, 27), EUID(6, 1, 26), EUID(6, 0, 25),
+       RSVD_ID, EUID(5, 3, 23), EUID(5, 2, 22), EUID(5, 1, 21), EUID(5, 0, 20),
+       RSVD_ID, EUID(4, 3, 18), EUID(4, 2, 17), EUID(4, 1, 16), EUID(4, 0, 15),
+       RSVD_ID, EUID(2, 3, 13), EUID(2, 2, 12), EUID(2, 1, 11), EUID(2, 0, 10),
+       RSVD_ID, EUID(1, 3, 8), EUID(1, 2, 7), EUID(1, 1, 6), EUID(1, 0, 5),
+       RSVD_ID, EUID(0, 3, 3), EUID(0, 2, 2), EUID(0, 1, 1), EUID(0, 0, 0)
+};
+
+struct debuggee gt2_debug_ids[] = {
+       EUID(8, 1, 31), EUID(8, 0, 30),
+       EUID(6, 4, 29), EUID(6, 3, 28), EUID(6, 2, 27), EUID(6, 1, 26), EUID(6, 
0, 25),
+       EUID(5, 4, 24), EUID(5, 3, 23), EUID(5, 2, 22), EUID(5, 1, 21), EUID(5, 
0, 20),
+       EUID(4, 4, 19), EUID(4, 3, 18), EUID(4, 2, 17), EUID(4, 1, 16), EUID(4, 
0, 15),
+       EUID(2, 4, 14), EUID(2, 3, 13), EUID(2, 2, 12), EUID(2, 1, 11), EUID(2, 
0, 10),
+       EUID(1, 4, 9), EUID(1, 3, 8), EUID(1, 2, 7), EUID(1, 1, 6), EUID(1, 0, 
5),
+       EUID(0, 4, 4), EUID(0, 3, 3), EUID(0, 2, 2), EUID(0, 1, 1), EUID(0, 0, 
0),
+       RSVD_ID, RSVD_ID, RSVD_ID, RSVD_ID,
+       EUID2(14, 4, 27), EUID2(14, 3, 26), EUID2(14, 2, 25), EUID2(14, 1, 24), 
EUID2(14, 0, 23),
+       EUID2(13, 4, 22), EUID2(13, 3, 21), EUID2(13, 2, 20), EUID2(13, 1, 19), 
EUID2(13, 0, 18),
+       EUID2(12, 4, 17), EUID2(12, 3, 16), EUID2(12, 2, 15), EUID2(12, 1, 14), 
EUID2(12, 0, 13),
+       EUID2(10, 4, 12), EUID2(10, 3, 11), EUID2(10, 2, 10), EUID2(10, 1, 9), 
EUID2(10, 0, 8),
+       EUID2(9, 4, 7), EUID2(9, 3, 6), EUID2(9, 2, 5), EUID2(9, 1, 4), 
EUID2(9, 0, 3),
+       EUID2(8, 4, 2), EUID2(8, 3, 1), EUID2(8, 2, 0)
+};
+
+struct debugger gt1 = {
+       .debuggees = gt1_debug_ids,
+       .num_threads = 32,
+       .real_num_threads = 24,
+       .threads_per_eu = 4
+};
+
+struct debugger gt2 = {
+       .debuggees = gt2_debug_ids,
+       .num_threads = 64,
+       .real_num_threads = 60,
+       .threads_per_eu = 5
+};
+
+static void
+dump_debug(void *buf, size_t count) {
+       if (!debug_fd)
+               debug_fd = open(debug_file, O_CREAT | O_WRONLY | O_TRUNC, 
S_IRWXO);
+
+       write(debug_fd, buf, count);
+}
+
+static volatile void *
+map_debug_buffer(void) {
+       int ret;
+
+       ret = drm_intel_bo_map(scratch_bo, 0);
+       assert(ret == 0);
+       return scratch_bo->virtual;
+}
+
+static void
+unmap_debug_buffer(void) {
+       drm_intel_bo_unmap(scratch_bo);
+}
+
+static int
+wait_for_attn(int timeout, int *out_bits) {
+       int step = 1;
+       int eus_waiting = 0;
+       int i,j;
+
+       if (timeout <= 0) {
+               timeout = 1;
+               step = 0;
+       }
+
+       for (i = 0; i < timeout; i += step) {
+               for (j = 0; j < 8; j += 4) {
+                       uint32_t attn = intel_register_read(EU_ATT + j);
+                       if (attn) {
+                               int bit = 0;
+                               while( (bit = ffs(attn)) != 0) {
+                                       bit--; // ffs is 1 based
+                                       assert(bit >= 0);
+                                       out_bits[eus_waiting] = bit + (j * 8);
+                                       attn &= ~(1 << bit);
+                                       eus_waiting++;
+                               }
+                       }
+               }
+
+               if (intel_register_read(EU_ATT + 8) ||
+                   intel_register_read(EU_ATT + 0xc)) {
+                       fprintf(stderr, "Unknown attention bits\n");
+               }
+
+               if (eus_waiting || shutting_down)
+                       break;
+       }
+
+       return eus_waiting;
+}
+
+#define eu_fd(bit) eu_info->debuggees[bit].fd
+#define eu_id(bit) eu_info->debuggees[bit].euid
+#define eu_tid(bit) eu_info->debuggees[bit].tid
+static struct eu_state *
+find_eu_shmem(int bit, volatile uint8_t *buf) {
+       struct per_thread_data {
+               uint8_t ____[PER_THREAD_SCRATCH];
+       }__attribute__((packed)) *data;
+       struct eu_state *eu;
+       int mem_tid, mem_euid, i;
+
+       data = (struct per_thread_data *)buf;
+       for(i = 0; i < eu_info->num_threads; i++) {
+               eu = (struct eu_state *)&data[i];
+               mem_tid = eu->sr0 & 0x7;
+               mem_euid = (eu->sr0 >> 8) & 0xf;
+               if (mem_tid == eu_tid(bit) && mem_euid == eu_id(bit))
+                       break;
+               eu = NULL;
+       }
+
+       return eu;
+}
+
+#define GRF_CMP(a, b) memcmp(a, b, sizeof(grf))
+#define GRF_CPY(a, b) memcpy(a, b, sizeof(grf))
+static int
+handshake(struct eu_state *eu) {
+       if (GRF_CMP(eu->version, protocol_version)) {
+               if (debug) {
+                       printf("Bad EU protocol version %x %x\n",
+                               ((uint32_t *)&eu->version)[0],
+                               DEBUG_PROTOCOL_VERSION);
+                       dump_debug((void *)eu, sizeof(*eu));
+               }
+               return -EBAD_PROTOCOL;
+       }
+
+       if (GRF_CMP(eu->state_magic, eu_msg)) {
+               if (debug) {
+                       printf("Bad EU state magic %x %x\n",
+                               ((uint32_t *)&eu->state_magic)[0],
+                               ((uint32_t *)&eu->state_magic)[1]);
+                       dump_debug((void *)eu, sizeof(*eu));
+               }
+               return -EBAD_MAGIC;
+       } else {
+               GRF_CPY(eu->state_magic, cpu_ack);
+       }
+
+       eu->sr0 = RSVD_EU << 8 | RSVD_THREAD;
+       return 0;
+}
+
+static int
+collect_data(int bit, volatile uint8_t *buf) {
+       struct eu_state *eu;
+       ssize_t num;
+       int ret;
+
+       assert(eu_id(bit) != RSVD_EU);
+
+       if (eu_fd(bit) == -1) {
+               char name[128];
+               sprintf(name, "dump_eu_%02d_%d.bin", eu_id(bit), eu_tid(bit));
+               eu_fd(bit) = open(name, O_CREAT | O_WRONLY | O_TRUNC, S_IRWXO);
+               if (eu_fd(bit) == -1)
+                       return -1;
+       }
+
+       eu = find_eu_shmem(bit, buf);
+
+       if (eu == NULL) {
+               if (debug)
+                       printf("Bad offset %d %d\n", eu_id(bit), eu_tid(bit));
+               return -EBAD_SHMEM;
+       }
+
+       ret = handshake(eu);
+       if (ret)
+               return ret;
+
+       num = write(eu_fd(bit), (void *)eu, sizeof(*eu));
+       if (num != sizeof(*eu)) {
+               perror("unhandled write failure");
+               return EBAD_WRITE;
+       }
+
+
+       return 0;
+}
+
+static void
+clear_attn(int bit) {
+#if 0
+/*
+ * This works but doesn't allow for easily changed clearing bits
+ */
+static void
+clear_attn_old(int bit) {
+       int bit_to_clear = bit % 32;
+       bit_to_clear = 31 - bit_to_clear;
+       intel_register_write(0x7830 + (bit/32) * 4, 0);
+       intel_register_write(0x7830 + (bit/32) * 4, 1 << bit_to_clear);
+}
+#else
+       int bit_to_clear;
+       bit_to_clear = eu_info->debuggees[bit].clr;
+       intel_register_write(EU_ATT_CLR + (bit/32) * 4, 0);
+       intel_register_write(EU_ATT_CLR + (bit/32) * 4, 1 << bit_to_clear);
+#endif
+}
+
+static void
+shutdown(int sig) {
+
+       shutting_down = 1;
+       printf("Shutting down...\n");
+}
+
+static void
+die(int reason) {
+       int i = 0;
+
+       intel_register_write(EU_ATT_CLR, 0);
+       intel_register_write(EU_ATT_CLR + 4, 0);
+
+       if (debug_fd)
+               close(debug_fd);
+
+       for (i = 0; i < eu_info->num_threads; i++) {
+               if (eu_info->debuggees[i].fd != -1)
+                       close(eu_info->debuggees[i].fd);
+       }
+
+       unmap_debug_buffer();
+
+       intel_register_access_fini();
+       exit(reason);
+}
+
+static int
+identify_device(int devid) {
+       switch(devid) {
+       case PCI_CHIP_SANDYBRIDGE_GT1:
+       case PCI_CHIP_SANDYBRIDGE_M_GT1:
+       case PCI_CHIP_SANDYBRIDGE_S:
+               eu_info = &gt1;
+               break;
+       case PCI_CHIP_SANDYBRIDGE_GT2:
+       case PCI_CHIP_SANDYBRIDGE_GT2_PLUS:
+       case PCI_CHIP_SANDYBRIDGE_M_GT2:
+       case PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS:
+               eu_info = &gt2;
+               break;
+       default:
+               return 1;
+       }
+
+       return 0;
+}
+
+static void
+parse_data(const char *file_name) {
+       struct eu_state *eu_state = NULL;
+       struct stat st;
+       int fd = -1;
+       int ret, i, elements;
+
+       fd = open(file_name, O_RDONLY);
+       if (fd == -1) {
+               perror("open");
+               goto out;
+       }
+
+       ret = fstat(fd, &st);
+       if (ret == -1) {
+               perror("fstat");
+               goto out;
+       }
+
+       elements = st.st_size / sizeof(struct eu_state);
+       if (elements == 0) {
+               fprintf(stderr, "File not big enough for 1 entry\n");
+               goto out;
+       }
+
+       eu_state = mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+       if (eu_state == MAP_FAILED) {
+               perror("mmap");
+               goto out;
+       }
+
+       for(i = 0; i < elements; i++) {
+               printf("AIP: ");
+                       printf("%x\n", ((uint32_t *)eu_state[i].cr0)[2]);
+       }
+out:
+       if (eu_state)
+               munmap(eu_state, st.st_size);
+       if (fd != -1)
+               close(fd);
+}
+
+int main(int argc, char* argv[]) {
+       struct pci_device *pci_dev;
+       volatile uint8_t *scratch = NULL;
+       int bits[64];
+       int devid = -1, opt;
+
+       while ((opt = getopt(argc, argv, "cdr:p?h")) != -1) {
+               switch (opt) {
+               case 'c':
+                       clear_waits = 1;
+                       break;
+               case 'd':
+                       debug = 1;
+                       break;
+               case 'r':
+                       parse_data(optarg);
+                       exit(0);
+                       break;
+               case 'p':
+                       devid = atoi(optarg);
+                       break;
+               case '?':
+               case 'h':
+               default:
+                       exit(0);
+               }
+       }
+
+       pci_dev = intel_get_pci_device();
+       if (devid != -1);
+               devid = pci_dev->device_id;
+       if (identify_device(devid)) {
+               abort();
+       }
+
+       assert(intel_register_access_init(pci_dev, 1, 2) == 0);
+
+       memset(bits, -1, sizeof(bits));
+
+       /*
+        * These events have to occur before the SR runs, or we need
+        * non-blocking versions of the functions.
+        */
+       if (!clear_waits) {
+               int handle;
+               drm_fd = drm_open_any();
+               bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
+               printf("scratch handle: ");
+               if (fscanf(stdin, "%1d", &handle) == 0)
+                       exit(1);
+               scratch_bo = intel_bo_gem_create_from_name(bufmgr, "scratch", 
handle);
+               signal(SIGINT, shutdown);
+               printf("Press Ctrl-C to stop\n");
+       } else {
+               while (wait_for_attn(20000, bits)) {
+                       clear_attn(bits[0]);
+                       memset(bits, -1, sizeof(bits));
+               }
+               die(0);
+       }
+
+       scratch = map_debug_buffer();
+       while (shutting_down == 0) {
+               int num_events, i;
+
+               memset(bits, -1, sizeof(bits));
+               num_events = wait_for_attn(-1, bits);
+               if (num_events == 0)
+                       break;
+
+               for (i = 0; i < num_events; i++) {
+                       assert(bits[i] < 64 && bits[i] >= 0);
+                       if (collect_data(bits[i], scratch)) {
+                               bits[i] = -1;
+                               continue;
+                       }
+                       clear_attn(bits[i]);
+               }
+       }
+
+       die(0);
+       return 0;
+}
diff --git a/debugger/sr_loader.c b/debugger/sr_loader.c
new file mode 100644
index 0000000..4786895
--- /dev/null
+++ b/debugger/sr_loader.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <[email protected]>
+ *
+ */
+
+#include <sys/ioctl.h>
+#include <assert.h>
+#include "drmtest.h"
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+#include "system_routine/sr.txt"
+
+#ifdef USER_STATE_SETUP
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+static void
+upload_system_routine(drm_intel_bo *sr_bo) {
+       BEGIN_BATCH(12);
+       OUT_BATCH(BRW_STATE_BASE_ADDRESS | (8-2));
+       OUT_BATCH(0); /* General state base address */
+       OUT_BATCH(0); /* Surface state base address */
+       OUT_BATCH(0); /* Indirect object base address */
+       OUT_BATCH(1); /* Instruction base address */
+       OUT_BATCH(0); /* General state upper bound */
+       OUT_BATCH(0); /* Indirect object upper bound */
+       OUT_BATCH(1); /* Instruction access upper bound */
+       OUT_BATCH(BRW_STATE_SIP); /* STATE_SIP */
+       OUT_RELOC(sr_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+       OUT_BATCH(MI_FLUSH | 1 << 1);
+       OUT_BATCH(MI_NOOP);
+       ADVANCE_BATCH();
+       intel_batchbuffer_flush(batch);
+}
+
+static void
+copy_system_routine(drm_intel_bo *bo) {
+       drm_intel_bo_get_subdata(bo, 0, sizeof(gen_eu_bytes), (void 
*)gen_eu_bytes);
+}
+#endif
+
+static void
+set_debug_state(void) {
+       intel_register_write(INSTPM, INSTPM_GLOBAL_DEBUG_EN |
+                                    (INSTPM_GLOBAL_DEBUG_EN << 16));
+       intel_register_write(GEN6_TD_CTL, TD_CTL_FORCE_TD_BKPT);
+       assert(intel_register_read(INSTPM) & INSTPM_GLOBAL_DEBUG_EN);
+       assert(intel_register_read(GEN6_TD_CTL) & TD_CTL_FORCE_TD_BKPT);
+}
+
+int main(int argc, char **argv)
+{
+       int fd, ret = 0;
+
+       fd = drm_open_any();
+
+       ret = intel_register_access_init(intel_get_pci_device(), 1, 2);
+       set_debug_state();
+
+#ifndef USER_STATE_SETUP
+/* I'd like to move state setup from the kernel, but it's not working yet. */
+       struct drm_intel_system_routine sysrout;
+
+       sysrout.sr_buffer = (__u64)gen_eu_bytes;
+       sysrout.size = sizeof(gen_eu_bytes);
+
+       assert(fd > 0);
+
+       ret = ioctl(fd, DRM_IOCTL_I915_SYSTEM_ROUTINE, &sysrout);
+       assert(ret == 0);
+#else
+
+       drm_intel_bo *bo;
+
+       bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+       drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+       batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+       bo = drm_intel_bo_alloc(bufmgr, "system routine", sizeof(gen_eu_bytes), 
4096);
+       ret = drm_intel_bo_pin(bo, 4096);
+       copy_system_routine(bo);
+       upload_system_routine(bo);
+
+       intel_batchbuffer_free(batch);
+       drm_intel_bufmgr_destroy(bufmgr);
+       while(1);
+#endif
+       intel_register_access_fini();
+       close(fd);
+       return ret;
+}
diff --git a/debugger/system_routine/Makefile b/debugger/system_routine/Makefile
new file mode 100644
index 0000000..3da0f32
--- /dev/null
+++ b/debugger/system_routine/Makefile
@@ -0,0 +1,69 @@
+# Copyright © 2011 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Authors:
+#    Ben Widawsky <[email protected]>
+
+PRECPP=./pre_cpp.py
+CPP_FLAGS=-x assembler-with-cpp -P
+
+GEN_AS?=~/intel-gfx/intel-gen4asm/src/intel-gen4asm
+GEN_AS_FLAGS?=-g6 -a -b
+
+TEMP:=$(shell mktemp)
+TEMP2:=$(shell mktemp)
+
+GPU?=SANDYBRIDGE
+DEFINES+=-DGEN_ASM -D$(GPU) -I. -I../..//lib
+
+sr.txt: sr.g4a eviction_macro evict.h
+       $(PRECPP) $^ > $(TEMP)
+       $(CPP) $(CPP_FLAGS) $(DEFINES) -o $(TEMP2) $(TEMP)
+       $(GEN_AS) $(GEN_AS_FLAGS) $(TEMP2) -o $@
+
+helper: sr.g4a eviction_macro
+       $(PRECPP) $^ > help
+       $(CPP) $(CPP_FLAGS) $(DEFINES) -o help2 help
+       $(GEN_AS) $(GEN_AS_FLAGS) help2 -o $@
+
+eviction_macro :
+
+evict.h : eviction_macro
+       $(shell ./eviction_macro >| evict.h)
+
+all: sr.txt
+
+.PHONY : clean
+clean :
+       $(RM) sr.txt evict.h eviction_macro
+
+.PHONY : clean
+distclean: clean
+       $(RM) help*
+
+maintainer-clean: clean
+
+EMPTY_AUTOMAKE_TARGETS = install install-data install-exec uninstall \
+                        install-dvi install-html install-info install-ps \
+                        install-pdf installdirs check installcheck \
+                        mostlyclean dvi pdf ps info html tags ctags
+.PHONY: $(EMPTY_AUTOMAKE_TARGETS)
+$(EMPTY_AUTOMAKE_TARGETS):
diff --git a/debugger/system_routine/eviction_macro.c 
b/debugger/system_routine/eviction_macro.c
new file mode 100644
index 0000000..1da2233
--- /dev/null
+++ b/debugger/system_routine/eviction_macro.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <[email protected]>
+ *
+ */
+
+#include <stdio.h>
+
+#define START 0x100
+#define END ((128 << 10) / 4)
+
+int main(int argc, char *argv[]) {
+       int i;
+       printf("#ifdef SANDYBRIDGE\n");
+       printf("#define EVICT_CACHE \\\n");
+       printf("\tmov (1) m0.5:ud g0.5:ud FLAGS; \\\n");
+       for (i = START; i < END - 8; i+=0x8) {
+               printf("\tmov (1) m0.2:ud 0x%04x:ud FLAGS; \\\n", i);
+               printf("\tWRITE_SCRATCH4(m0); \\\n");
+       }
+
+       printf("\tmov (1) m0.2:ud 0x%04x:ud FLAGS; \\\n", i);
+       printf("\tWRITE_SCRATCH4(m0)\n");
+       printf("#else\n");
+       printf("#define EVICT_CACHE\n");
+       printf("#endif\n");
+}
diff --git a/debugger/system_routine/pre_cpp.py 
b/debugger/system_routine/pre_cpp.py
new file mode 100755
index 0000000..effea0e
--- /dev/null
+++ b/debugger/system_routine/pre_cpp.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+
+# Copyright © 2011 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Authors:
+#    Ben Widawsky <[email protected]>
+
+#very limited C-like preprocessor
+
+#limitations:
+# no macro substitutions
+# no multiline definitions
+# divide operator is //
+
+import sys,re
+
+file = open(sys.argv[1], "r")
+
+lines = file.readlines()
+len(lines)
+out = dict()
+defines = dict()
+
+count = 0
+#create a dict for our output
+for line in lines:
+    out[count] = line
+    count = count + 1
+
+#done is considered #define <name> <number>
+def is_done(string):
+    m = re.match("#define\s+(\w+?)\s+([a-fA-F0-9\-]+?)\s*$", string)
+    return m
+
+#skip macros, the real cpp will handle it
+def skip(string):
+    #macro
+    m = re.match("#define\s+\w+\(.+", string)
+    return m != None
+
+#put contants which are done being evaluated into the dictionary
+def easy_constants():
+    ret = 0
+    for lineno, string in out.items():
+        if skip(string):
+            continue
+        m = is_done(string)
+        if m != None:
+            key = m.group(1)
+            value = m.group(2)
+            if not key in defines:
+                    defines[key] = int(eval(value))
+                    ret = 1
+    return ret
+
+#replace names with dictionary values
+def simple_replace():
+    ret = 0
+    for lineno, string in out.items():
+        if skip(string):
+            continue
+        for key, value in defines.items():
+            if is_done(string):
+                continue
+            s = re.subn(key, repr(value), string)
+            if s[1] > 0:
+                out[lineno] = s[0]
+                ret = s[1]
+    return ret
+
+#evaluate expressions to try to simplify them
+def collapse_constants():
+    ret = 0
+    for lineno, string in out.items():
+        if skip(string):
+            continue
+        if is_done(string):
+            continue
+        m = re.match("#define\s+(.+?)\s+(.+)$", string)
+        if m != None:
+            try:
+                out[lineno] = "#define " + m.group(1) + " " + 
repr(eval(m.group(2)))
+                ret = 1
+            except NameError as ne:
+                #this happens before a variable is resolved in simple_replace
+                continue
+            except SyntaxError:
+                #this happens with something like #define foo bar, which the
+                #regular cpp can handle
+                continue
+            except:
+                raise KeyboardInterrupt
+    return ret;
+
+while True:
+    ret = 0
+    ret += easy_constants()
+    ret += simple_replace()
+    ret += collapse_constants()
+    if ret == 0:
+        break;
+
+for lineno, string in out.items():
+    print(string.rstrip())
diff --git a/debugger/system_routine/sr.g4a b/debugger/system_routine/sr.g4a
new file mode 100644
index 0000000..61c65d7
--- /dev/null
+++ b/debugger/system_routine/sr.g4a
@@ -0,0 +1,277 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <[email protected]>
+ *
+ */
+
+#include "debug.h"
+#include "evict.h"
+
+#define CR0_0_ME_STATE_CTRL (1 << 31)
+#define CR0_0_BP_SUPPRESS (1 << 15)
+#define CR0_0_SPF_EN (1 << 2)
+#define CR0_0_ACC_DIS (1 << 1)
+#define CR0_1_BES_CTRL (1 << 31)
+#define CR0_1_HALT_CTRL (1 << 30)
+#define CR0_1_SOFT_EXCEPTION_CTRL (1 << 29)
+#define CR0_1_ILLGL_OP_STS (1 << 28)
+#define CR0_1_STACK_OVRFLW_STS (1 << 27)
+
+#define CR0_0_ENTRY_UNMASK (CR0_0_SPF_EN | CR0_0_ACC_DIS)
+// TODO: Need to fix this for non breakpoint case
+#define CR0_1_ENTRY_UNMASK ~(CR0_1_BES_CTRL)
+#define CR0_0_RETURN_MASK ~(CR0_0_ME_STATE_CTRL | CR0_0_SPF_EN | CR0_0_ACC_DIS)
+
+// TODO: not sure how to make this not hardcoded
+#define PER_THREAD_SCRATCH_SIZE (1 << 20)
+#define PER_THREAD_QWORDS (PER_THREAD_SCRATCH_SIZE >> 4)
+
+/* Should get this from brw_defines.h */
+#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS              2
+#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS              3
+#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS              4
+#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE  8
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ     0
+
+/* desc field, ie. dword3 6.3.66.2 and 2.11.2.1.4 */
+#define SEND_MLEN_5            (5<<25)
+#define SEND_MLEN_3            (3<<25)
+#define SEND_MLEN_2            (2<<25)
+#define SEND_MLEN_1            (1<<25)
+#define SEND_RLEN_1            (1<<20)
+#define SEND_RLEN_0            (0<<20)
+#define SEND_HEADER_PRESENT    (1<<19)
+#define SEND_WRITE_COMMIT      (1<<17)
+#define SEND_TYPE_WRITE        
(GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE<<13)
+#define SEND_TYPE_READ (BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ<<13)
+#define SEND_BLOCK_SIZE1       (BRW_DATAPORT_OWORD_BLOCK_2_OWORDS<<8)
+#define SEND_BLOCK_SIZE2       (BRW_DATAPORT_OWORD_BLOCK_4_OWORDS<<8)
+#define SEND_BLOCK_SIZE4       (BRW_DATAPORT_OWORD_BLOCK_8_OWORDS<<8)
+#define SEND_BINDING_TABLE     (255<<0)
+// No write commit
+#define WRITE_DESC1_XXX SEND_BINDING_TABLE | SEND_BLOCK_SIZE1 | 
SEND_TYPE_WRITE | SEND_HEADER_PRESENT | SEND_MLEN_2
+#define WRITE_DESC1_WC SEND_BINDING_TABLE | SEND_BLOCK_SIZE1 | SEND_TYPE_WRITE 
| SEND_HEADER_PRESENT | SEND_MLEN_2 | SEND_WRITE_COMMIT
+#define WRITE_DESC2 SEND_BINDING_TABLE | SEND_BLOCK_SIZE2 | SEND_TYPE_WRITE | 
SEND_HEADER_PRESENT | SEND_MLEN_3
+#define WRITE_DESC4 SEND_BINDING_TABLE | SEND_BLOCK_SIZE4 | SEND_TYPE_WRITE | 
SEND_HEADER_PRESENT | SEND_MLEN_5
+#define RECV_DESC1 SEND_BINDING_TABLE | SEND_BLOCK_SIZE1 | SEND_TYPE_READ | 
SEND_HEADER_PRESENT | SEND_MLEN_1 | SEND_RLEN_1
+//#define SEND_DESC1 0x40902FF
+#define SEND_DESC1_WC 0x40b02FF
+
+/* ex_desc field 6.3.66.2 */
+#define SEND_DP_RENDER_CACHE   (5<<0)
+#define SEND_EOT               (1<<5)
+#define SEND_EX_DESC SEND_DP_RENDER_CACHE
+
+/**
+ * WRITE_SCRATCH1 - Write 2 owords.
+ * cdst.2 - offset
+ * cdst.5 - per thread scratch base, relative to gsba??
+ * cdst+1 - data to be written.
+ */
+#define WRITE_SCRATCH1(cdst) \
+       send (16) null cdst SEND_EX_DESC WRITE_DESC1_XXX FLAGS
+#define WRITE_SCRATCH1_WC(cdst) \
+       send (16) g1 cdst SEND_EX_DESC WRITE_DESC1_WC FLAGS
+#define WRITE_SCRATCH2(cdst) \
+       send (16) null cdst SEND_EX_DESC WRITE_DESC2 FLAGS
+#define WRITE_SCRATCH4(cdst) \
+       send (16) null cdst SEND_EX_DESC WRITE_DESC4 FLAGS
+
+/**
+ * READ_SCRATCH1 - Read 2 owords.
+ * cdst.2 - offset
+ * cdst.5 - per thread scratch base, relative to gsba??
+ * grf - register where read data is populated.
+ */
+#define READ_SCRATCH1(grf, cdst) \
+       send (16) grf:ud cdst SEND_EX_DESC RECV_DESC1 FLAGS
+
+/**
+ * SET_OFFSET - setup mrf for the given offset prior to a send instruction.
+ * mrf - message register to be used as the header.
+ * offset - offset.
+ *
+ * If a WRITE_SCRATCH follows, mrf+1 -> mrf+1+n should contain the data to be
+ * written.
+ */
+#define SET_OFFSET(mrf, offset) \
+       mov (1) mrf.5:ud g0.5:ud FLAGS; \
+       mov (1) mrf.2:ud offset:ud FLAGS
+
+/**
+ * SAVE_CRF - save the control register
+ * clobbers: m0.2, m0.5
+ */
+#define CR_OFFSET 0x40
+#define SAVE_CRF \
+       SET_OFFSET(m0, CR_OFFSET); \
+       mov (8) m1:ud 0xdeadbeef:ud FLAGS; \
+       mov (1) m1.0:ud cr0.0 FLAGS; \
+       mov (1) m1.1:ud cr0.1 FLAGS; \
+       mov (1) m1.2:ud cr0.2 FLAGS; \
+       mov (1) m1.3:ud sr0:ud FLAGS; \
+       WRITE_SCRATCH1(m0)
+
+/*
+ * clobbers: m0.2, m0.5
+ */
+#define STORE_GRF(grf, offset) \
+       SET_OFFSET(m0, offset); \
+       mov (8) m1:ud grf:ud FLAGS; \
+       WRITE_SCRATCH1(m0)
+
+/*
+ * clobbers: m0.2, m0.5
+ */
+#define LOAD_GRF(grf, offset) \
+       SET_OFFSET(m0, offset); \
+       READ_SCRATCH1(grf, m0)
+
+/*
+ * clobbers: mrf.2 mrf.5
+ */
+#define SAVE_MRF(mrf, offset) \
+       SET_OFFSET(mrf, offset); \
+       WRITE_SCRATCH1(mrf)
+
+/*
+ * non-quirky semantics, unlike SAVE_MRF
+ * clobbers: g1
+ */
+#define RESTORE_MRF(mrf, offset) \
+       LOAD_GRF(g1, offset); \
+       mov (8) mrf:ud g1:ud FLAGS
+
+#define SAVE_ALL_MRF \
+       /* m1 is saved already */ \
+       SAVE_MRF(m1, 0x2); \
+       SAVE_MRF(m2, 0x4); \
+       SAVE_MRF(m3, 0x6); \
+       SAVE_MRF(m4, 0x8); \
+       SAVE_MRF(m5, 0xa); \
+       SAVE_MRF(m6, 0xc); \
+       SAVE_MRF(m7, 0xe); \
+       SAVE_MRF(m8, 0x10); \
+       SAVE_MRF(m9, 0x12); \
+       SAVE_MRF(m10, 0x14); \
+       SAVE_MRF(m11, 0x16); \
+       SAVE_MRF(m12, 0x18); \
+       SAVE_MRF(m13, 0x1a); \
+       SAVE_MRF(m14, 0x1c)
+
+#define RESTORE_ALL_MRF \
+       RESTORE_MRF(m15, 0x1c); \
+       RESTORE_MRF(m14, 0x1a); \
+       RESTORE_MRF(m13, 0x18); \
+       RESTORE_MRF(m12, 0x16); \
+       RESTORE_MRF(m11, 0x14); \
+       RESTORE_MRF(m10, 0x12); \
+       RESTORE_MRF(m9, 0x10); \
+       RESTORE_MRF(m8, 0xe); \
+       RESTORE_MRF(m7, 0xc); \
+       RESTORE_MRF(m6, 0xa); \
+       RESTORE_MRF(m5, 0x8); \
+       RESTORE_MRF(m4, 0x6); \
+       RESTORE_MRF(m3, 0x4); \
+       RESTORE_MRF(m2, 0x2); \
+       RESTORE_MRF(m1, 0x0)
+
+#ifndef SANDYBRIDGE
+       #error Only SandyBridge is supported
+#endif
+
+/* Default flags for an instruction */
+#define FLAGS { ALIGN1, SWITCH, MASK_DISABLE, ACCWRCTRL}
+
+/*
+ * We can clobber m0, and g0.4, everything else must be saved.
+ */
+Enter:
+       nop;
+
+       or (1) cr0.0 cr0.0 CR0_0_ENTRY_UNMASK:ud FLAGS;
+
+       /*
+        * g0.5 has the per thread scratch space when running in FS or VS.
+        * If we don't have a valid g0.5, we can calculate a per thread scratch 
offset
+        * using the system registers. The problem is we do not have a good way 
to know
+        * the offset from GSBA. The system routine will have to be hardcoded or
+        * dynamically patched with the correct offset.
+        * TID is in sr0.0[2:0]
+        * EUID is in sr0.0[11:8]
+        */
+
+       mov (1) g0.4:ud 0:ud                            FLAGS;
+
+#ifdef GPGPU
+#if 0
+       /* This should work according to the docs, the add blows up */
+       shr (1) g0.8:uw sr0.0:uw 5                      FLAGS;
+       add (1) g0.16:ub gr0.16:ub sr0.0:ub             FLAGS;
+#else
+       shr (1) g0.8:uw sr0.0:uw 5                      FLAGS;
+       mov (1) g0.9:uw sr0.0:uw                        FLAGS;
+       and (1) g0.9:uw g0.9:uw 0x7:uw                  FLAGS;
+       add (1) g0.8:uw g0.8:uw g0.9:uw                 FLAGS;
+       mov (1) g0.9:uw 0:uw                            FLAGS;
+       mul (1) g0.4:ud g0.4:ud PER_THREAD_QWORDS       FLAGS;
+#endif
+#endif
+
+
+       /* Saves must occur in order so as not to clobber the next register */
+       SAVE_MRF(m0, 0);
+       STORE_GRF(g0, 0x20);
+       STORE_GRF(g1, 0x22);
+       SAVE_ALL_MRF;
+
+       mov (8) g1:ud STATE_EU_MSG:ud FLAGS;
+       STORE_GRF(g1, STATE_QWORD);
+
+       mov (8) g1:ud DEBUG_PROTOCOL_VERSION:ud FLAGS;
+       STORE_GRF(g1, COMMUNICATION_QWORD);
+
+       SAVE_CRF;
+
+       EVICT_CACHE;
+       wait n1:ud;
+       EVICT_CACHE;
+
+       /* Using this to try to keep coherency */
+       LOAD_GRF(g1, CR_OFFSET);
+       LOAD_GRF(g1, COMMUNICATION_QWORD);
+       LOAD_GRF(g1, STATE_QWORD);
+
+       RESTORE_ALL_MRF;
+       LOAD_GRF(g1, 0x22);
+       LOAD_GRF(g0, 0x20);
+
+       /* Clear breakpoint status */
+       and (1) cr0.1 cr0.1 CR0_1_ENTRY_UNMASK:ud FLAGS;
+
+       /* set breakpoint suppress this should be conditional on bes */
+       or  (1) cr0.0 cr0.0 CR0_0_BP_SUPPRESS:ud FLAGS;
+
+       and (1) cr0.0 cr0.0 CR0_0_RETURN_MASK:ud FLAGS;
+       nop;
diff --git a/lib/debug.h b/lib/debug.h
new file mode 100644
index 0000000..886f2a4
--- /dev/null
+++ b/lib/debug.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <[email protected]>
+ *
+ */
+
+#ifndef _DEBUG_H_
+#define _DEBUG_H_
+
+#define DEBUG_PROTOCOL_VERSION 1
+#define COMMUNICATION_OFFSET 0xc00
+#define COMMUNICATION_QWORD 0xc0
+
+#define STATE_EU_MSG 0x01f00d10
+#define STATE_CPU_ACK 0x02f00d20
+#define STATE_OFFSET 0xc20
+#define STATE_QWORD 0xc2
+
+#define TX_OFFSET 0xc40
+#define TX_QWORD 0xc4
+#define RX_OFFSET 0xc60
+#define RX_QWORD 0xc6
+
+#ifndef GEN_ASM
+typedef uint32_t grf[8];
+typedef uint32_t mrf[8];
+typedef uint8_t cr[12];
+typedef uint32_t sr;
+
+#define DWORD8(x) {x, x, x, x, x, x, x, x}
+
+const static grf protocol_version = DWORD8(DEBUG_PROTOCOL_VERSION);
+const static grf eu_msg = DWORD8(STATE_EU_MSG);
+const static grf cpu_ack = DWORD8(STATE_CPU_ACK);
+
+struct eu_state {
+       mrf m_regs[15];
+       grf g_regs[16];
+       grf pad;
+
+/* 0x400 */
+       cr cr0;
+       sr sr0;
+       uint32_t beef_pad[4];
+       uint8_t pad2[992 + 1024];
+
+/* 0xc00 COMMUNICATION_OFFSET */
+       grf version;
+       grf state_magic;
+       grf eu_tx;
+       grf eu_rx;
+
+       uint8_t pad3[896];
+} __attribute__((packed));
+
+static inline void
+print_reg(uint8_t reg[32]) {
+       uint32_t *dwords = (uint32_t *)reg;
+       printf("%08x %08x %08x %08x %08x %08x %08x %08x",
+               dwords[7], dwords[6], dwords[5], dwords[4],
+               dwords[3], dwords[2], dwords[1], dwords[0]);
+}
+
+static inline void
+print_creg(uint8_t reg[12]) {
+       uint32_t *dwords = (uint32_t *)reg;
+       printf("%08x %08x %08x", dwords[2], dwords[1], dwords[0]);
+}
+#endif
+
+#endif
-- 
1.7.5.2

_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to