Adds functions to create a number of different batch buffers to perform
several functions including:
Batch buffer which will run for a long duration to provide a delay on a
specified ring.
Function to calibrate the delay batch buffer to run for a specified period
of time.
Function to create a batch buffer which writes timestamps to a buffer object.
Function to compare timestamps allowing for wrapping of the values.

v2: Moved code to intel_batchbuffer (Daniel Vetter)
    Addressed review comments from Daniele Ceraolo Spurio

Signed-off-by: Derek Morton <derek.j.mor...@intel.com>
---
 lib/intel_batchbuffer.c | 384 +++++++++++++++++++++++++++++++++++++++++++++++-
 lib/intel_batchbuffer.h |  14 ++
 2 files changed, 393 insertions(+), 5 deletions(-)

diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index 692521f..30e78c5 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 #include <inttypes.h>
@@ -30,6 +30,8 @@
 #include <stdio.h>
 #include <string.h>
 #include <assert.h>
+#include <stdint.h>
+#include <time.h>
 
 #include "drm.h"
 #include "drmtest.h"
@@ -42,6 +44,7 @@
 #include "ioctl_wrappers.h"
 #include "media_spin.h"
 #include "gpgpu_fill.h"
+#include "igt_gt.h"
 
 #include <i915_drm.h>
 
@@ -817,3 +820,374 @@ igt_media_spinfunc_t igt_get_media_spinfunc(int devid)
 
        return spin;
 }
+
+#define SEC_TO_NSEC (1000 * 1000 * 1000)
+#define DWORDS_TO_BYTES(x) ((x)*4)
+
+#define MI_STORE_REGISTER_MEM(LENGTH)   ((0x024 << 23) | ((LENGTH - 2) & 0xff))
+#define MI_MATH(NrInst)                 ((0x01A << 23) | ((NrInst - 1) & 0x3f))
+#define MI_CONDITIONAL_BATCH_BUFFER_END ((0x036 << 23) | (1 << 21) | 2)
+#define MI_COPY_MEM_MEM                 ((0x02E << 23) | (3))
+
+#define ALU_LOAD(TO, FROM)  ((0x080 << 20) | ((TO) << 10) | (FROM))
+#define ALU_SUB             ( 0x101 << 20)
+#define ALU_STORE(TO, FROM) ((0x180 << 20) | ((TO) << 10) | (FROM))
+
+#define TIMESTAMP_offset      (0x358) /* Elapsed time from system start */
+#define CTX_TIMESTAMP_offset  (0x3A8) /* Elapsed Time from context creation */
+#define ALU_GPU_R0_LSB_offset (0x600)
+#define ALU_GPU_R0_MSB_offset (0x604)
+#define ALU_GPU_R1_LSB_offset (0x608)
+#define ALU_GPU_R1_MSB_offset (0x60C)
+#define ALU_GPU_R2_LSB_offset (0x610)
+#define ALU_GPU_R2_MSB_offset (0x614)
+
+#define ALU_R0_ENCODING   (0x00)
+#define ALU_R1_ENCODING   (0x01)
+#define ALU_SRCA_ENCODING (0x20)
+#define ALU_SRCB_ENCODING (0x21)
+#define ALU_ACCU_ENCODING (0x31)
+
+static int bb_address_size_dw(int fd)
+{
+       if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+               return 2;
+       else
+               return 1;
+}
+
+static uint32_t get_mmio_base(int ringid)
+{
+       switch (ringid) {
+       case I915_EXEC_RENDER:
+               return 0x02000;
+       case I915_EXEC_BSD:
+       case I915_EXEC_BSD | 1<<13: /* BSD1 */
+               return 0x12000;
+       case I915_EXEC_BSD | 2<<13: /* BSD2 */
+               return 0x1c000;
+       case I915_EXEC_BLT:
+               return 0x22000;
+       case I915_EXEC_VEBOX:
+               return 0x1A000;
+       default:
+               igt_assert_f(0, "Invalid ringid %d passed to 
get_mmio_base()\n", ringid);
+       }
+}
+
+/**
+ * igt_batch_used
+ * @batch batchbuffer to get offset from
+ *
+ * This returns the number of bytes of the batchbuffer that have been used.
+ * e.g. The offset into the batchbuffer that the next OUT_BATCH would write to.
+ *
+ * Returns:
+ * The number of bytes of the batchbuffer that have been used.
+ */
+uint32_t igt_batch_used(struct intel_batchbuffer *batch)
+{
+       return batch->ptr - batch->buffer;
+}
+
+/**
+ * igt_create_delay_bb:
+ * @fd: file descriptor for i915 driver instance
+ * @batch: Batch buffer to write to
+ * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
+ * loops: Number of times to loop
+ * dest: Buffer to use for saving the current loop count and timestamp.
+ *
+ * This creates a batch buffer which will iterate a loop a specified number
+ * of times. Intended for creating batch buffers which take an arbitarily
+ * long time to execute. This can be useful to keep a ring busy while
+ * constructing a test scenario.
+ *
+ * The dest buffer will have a number of Dwords written by the batch buffer
+ * when it runs. They are:
+ * DW0 & DW1 - These are loaded with the value of 'loops' and are decremented
+ *             as the batch buffer executes. They will be 0 after the batch
+ *             buffer completes if it finished succesfully.
+ * DW2 Timestamp - An indication of when the batch buffer ran allowing a
+ *                 comparison between batch buffers to show execution order.
+ *                 May wrap so igt_compare_timestamps() should be used to
+ *                 compare timestamps.
+ *                 The timestamp will wrap every few minutes.
+ *
+ */
+void igt_create_delay_bb(int fd, struct intel_batchbuffer *batch,
+                         int ringid, uint32_t loops, drm_intel_bo *dest)
+{
+       int addr_size_dw;
+       uint32_t mmio_base, jump_offset;
+
+       /* CMD parser blocks reading TIMESTAMP register on gen 7.5 */
+       igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
+
+       addr_size_dw = bb_address_size_dw(fd);
+       mmio_base = get_mmio_base(ringid);
+       igt_assert(batch);
+       BEGIN_BATCH(32, 5);
+
+       /* store current timestamp in DW2 */
+       OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+       OUT_BATCH(mmio_base + TIMESTAMP_offset);
+       OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 
I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));
+
+       /* Load R0 with loops */
+       OUT_BATCH(MI_LOAD_REGISTER_IMM);
+       OUT_BATCH(mmio_base + ALU_GPU_R0_LSB_offset);
+       OUT_BATCH(loops);
+       OUT_BATCH(MI_LOAD_REGISTER_IMM);
+       OUT_BATCH(mmio_base + ALU_GPU_R0_MSB_offset);
+       OUT_BATCH(0x00000000);
+       /* Load R1 with 1 */
+       OUT_BATCH(MI_LOAD_REGISTER_IMM);
+       OUT_BATCH(mmio_base + ALU_GPU_R1_LSB_offset);
+       OUT_BATCH(0x00000001);
+       OUT_BATCH(MI_LOAD_REGISTER_IMM);
+       OUT_BATCH(mmio_base + ALU_GPU_R1_MSB_offset);
+       OUT_BATCH(0x00000000);
+       /* Copy R0 / R1 into SRCA / SRCB, Perform R0 - R1, Store result in R0 */
+       /* e.g. R0 -= 1 */
+       jump_offset=igt_batch_used(batch);
+       OUT_BATCH(MI_MATH(4));
+       OUT_BATCH(ALU_LOAD(ALU_SRCA_ENCODING, ALU_R0_ENCODING));
+       OUT_BATCH(ALU_LOAD(ALU_SRCB_ENCODING, ALU_R1_ENCODING));
+       OUT_BATCH(ALU_SUB);
+       OUT_BATCH(ALU_STORE(ALU_R0_ENCODING, ALU_ACCU_ENCODING));
+       /* Copy R0 to dest
+        * On Gen8 MI_CONDITIONAL_BATCH_BUFFER_END BSD ring Compare address
+        * points to 2 Dwords, a mask (DW0) and data (DW1) which are ANDed
+        * together.
+        * On Gen9+, and the other rings on Gen8 Compare address points to
+        * just Data (DW0). For simplicity always copy R0 LSB to DW0 and DW1.
+        */
+       OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+       OUT_BATCH(mmio_base + ALU_GPU_R0_LSB_offset);
+       OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 
I915_GEM_DOMAIN_INSTRUCTION, 0);
+       OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+       OUT_BATCH(mmio_base + ALU_GPU_R0_LSB_offset);
+       OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 
I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(1));
+       /* Repeat until R0 == 0 */
+       OUT_BATCH(MI_CONDITIONAL_BATCH_BUFFER_END);
+       OUT_BATCH(0x00000000);
+       OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 
I915_GEM_DOMAIN_INSTRUCTION, 0);
+       OUT_BATCH(MI_BATCH_BUFFER_START | (addr_size_dw - 1));
+       OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, jump_offset);
+
+       /* Should never get here, but end if it happens */
+       OUT_BATCH(MI_BATCH_BUFFER_END);
+       ADVANCE_BATCH();
+}
+
+/**
+ * igt_create_timestamp_bb:
+ * @fd: file descriptor for i915 driver instance
+ * @batch: Batch buffer to write to
+ * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
+ * dest: Buffer to use for saving the timestamps.
+ * load: Buffer to access. Set NULL if not required.
+ * write: If true and load is not NULL, will also write a timestamp to load
+ * buffer. If false and load is not NULL, will read from load buffer into dest.
+ * Intended for dependency checking.
+ *
+ * This creates a batch buffer which writes timestamps into a buffer object.
+ * If 'load' is non null, data is either written to 'load' or copied from 
'load'
+ * depending on whether 'write' is set.
+ *
+ * The dest buffer will have a number of Dwords written by the batch buffer
+ * when it runs. They are:
+ * DW0 Reported timestamp - An indication of when the batch buffer ran 
allowing a
+ *                          comparison between batch buffers to show execution 
order.
+ *                          May wrap so igt_compare_timestamps() should be 
used to
+ *                          compare timestamps.
+ *                          The timestamp will wrap every few minutes.
+ * DW2 Value copied from DW0 of load if write == false
+ *
+ */
+void igt_create_timestamp_bb(int fd, struct intel_batchbuffer *batch, int 
ringid,
+                             drm_intel_bo *dest, drm_intel_bo *load, bool 
write)
+{
+       int addr_size_dw;
+       uint32_t mmio_base;
+
+       /* CMD parser blocks reading TIMESTAMP register on gen 7.5 */
+       igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
+
+       addr_size_dw = bb_address_size_dw(fd);
+       mmio_base = get_mmio_base(ringid);
+       igt_assert(batch);
+
+       BEGIN_BATCH(3, 1);
+       /* store current reported timestamp in DW0 */
+       OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+       OUT_BATCH(mmio_base + TIMESTAMP_offset);
+       OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 
I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));
+
+       ADVANCE_BATCH();
+
+       if(load != NULL) {
+               if(write) {
+                       BEGIN_BATCH(3, 1);
+                       OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+                       OUT_BATCH(mmio_base + TIMESTAMP_offset);
+                       OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, 
I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));
+                       ADVANCE_BATCH();
+               }
+               else {
+                       BEGIN_BATCH(3, 2);
+                       OUT_BATCH(MI_COPY_MEM_MEM);
+                       OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 
I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));
+                       OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, 0, 
DWORDS_TO_BYTES(0));
+                       ADVANCE_BATCH();
+               }
+       }
+
+       BEGIN_BATCH(1, 0);
+       OUT_BATCH(MI_BATCH_BUFFER_END);
+       ADVANCE_BATCH();
+}
+
+/**
+ * igt_create_noop_bb:
+ * @batch: Batch buffer to write to
+ * noops: Number of MI_NOOP instructions to add to the batch buffer.
+ *
+ * This creates a batch buffer with a specified number of MI_NOOP instructions.
+ */
+void igt_create_noop_bb(struct intel_batchbuffer *batch, int noops)
+{
+       int loop;
+
+       igt_assert(batch);
+
+       BEGIN_BATCH(noops + 1, 0);
+       for(loop = 0; loop < noops; loop++)
+               OUT_BATCH(MI_NOOP);
+       OUT_BATCH(MI_BATCH_BUFFER_END);
+       ADVANCE_BATCH();
+
+}
+
+/* Store calibrated values so they only need calculating once.
+ * Use intel_execution_engines array as list of supported rings
+ */
+static uint32_t *calibrated_ring_value = NULL;
+
+/**
+ * igt_calibrate_delay_bb:
+ * @fd: file descriptor for i915 driver instance
+ * @bufmgr: Buffer manager to be used for creation of batch buffers
+ * ringid: Ring to calibrate. e.g. I915_EXEC_RENDER
+ *
+ * This calculates the value of loops that would need to be passed to
+ * igt_create_delay_bb() to create a delay of about 1 second on the specified
+ * ring.
+ *
+ * Returns:
+ * uint32_t to be passed to igt_create_delay_bb().
+ */
+/* 0x100000 will run for about 0.6 - 0.8 seconds (dependant on ring) on BXT HW 
*/
+#define CAL_SEED (0x100000)
+uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int ringid)
+{
+       uint32_t buf[2];
+       struct intel_batchbuffer *bb;
+       struct timespec start, end;
+       uint64_t duration;
+       uint64_t calibrated;
+       drm_intel_bo *target_bo;
+       int ring_index=0;
+
+/*     igt_assert(ringid < 8);
+       if(calibrated_ring_value[ringid] != 0)
+               return calibrated_ring_value[ringid];*/
+
+       if(calibrated_ring_value == NULL) {
+               int count;
+               for(count = 0; intel_execution_engines[count].name != NULL; 
count++) {}
+               calibrated_ring_value = calloc(count, sizeof(uint32_t));
+               igt_assert(calibrated_ring_value);
+       }
+
+       /* Check if there is already a calibration value for this ring */
+       while(intel_execution_engines[ring_index].name != NULL) {
+               if((intel_execution_engines[ring_index].exec_id |
+                   intel_execution_engines[ring_index].flags) == ringid) {
+                       if(calibrated_ring_value[ring_index] != 0) {
+                               return calibrated_ring_value[ring_index];
+                       }
+                       break;
+               }
+               ring_index++;
+       }
+
+       target_bo = drm_intel_bo_alloc(bufmgr, "target bo", BATCH_SZ, BATCH_SZ);
+       igt_assert(target_bo);
+
+       /* Put some non zero values in the target bo */
+       {
+               uint32_t data=0xffffffff;
+               drm_intel_bo_subdata(target_bo, 0, 4, &data);
+       }
+
+       bb = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+       igt_assert(bb);
+       igt_create_delay_bb(fd, bb, ringid, CAL_SEED, target_bo);
+
+       gem_quiescent_gpu(fd);
+       clock_gettime(CLOCK_MONOTONIC, &start);
+       intel_batchbuffer_flush_on_ring(bb, ringid);
+       /* This will not return until the bo has finished executing */
+       drm_intel_bo_wait_rendering(target_bo);
+       clock_gettime(CLOCK_MONOTONIC, &end);
+
+       drm_intel_bo_get_subdata(target_bo, 0, 4, (void*)buf);
+
+       /* buf[0] in the target buffer should be 0 if the batch buffer 
completed */
+       igt_assert_f(buf[0] == 0, "buf[0] expected 0x0, got 0x%x\n", buf[0]);
+
+       duration = ((((uint64_t)end.tv_sec - (uint64_t)start.tv_sec) * 
SEC_TO_NSEC)
+                  + (uint64_t)end.tv_nsec) - (uint64_t)start.tv_nsec;
+
+       calibrated = (((uint64_t)(CAL_SEED) * SEC_TO_NSEC) / duration);
+       igt_debug("Uncalibrated run took %" PRIu64 ".%04" PRIu64 "s\n",
+                 duration / SEC_TO_NSEC,
+                 (duration % SEC_TO_NSEC) / 100000);
+       drm_intel_bo_unreference(target_bo);
+       intel_batchbuffer_free(bb);
+
+       /* Sanity check. If duration < 100ms, something has clearly gone wrong 
*/
+       igt_assert(duration > (SEC_TO_NSEC  / 10));
+
+       igt_assert_f(calibrated <= UINT32_MAX, "Calibrated value > 
UINT32_MAX\n");
+
+       if(intel_execution_engines[ring_index].name != NULL)
+               calibrated_ring_value[ring_index] = (uint32_t)calibrated;
+       return (uint32_t)calibrated;
+}
+
+/**
+ * igt_compare_timestamps:
+ * @ts1: timestamp 1
+ * @ts2: timestamp 2
+ *
+ * This compares two uint32_t timestamps. To handle wrapping it assumes the
+ * difference between the two timestamps is less than 1/4 the max elapsed time
+ * represented by the counters.
+ * It also assumes the timestamps are samples from the same counter.
+ *
+ * Returns:
+ * True if ts2 > ts1, allowing for counter wrapping, false otherwise.
+ */
+
+bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2)
+{
+       if (ts2 > ts1)
+               return true;
+       else if ((ts1 > 0x80000000) && (ts2 < 0x40000000))
+               return true; /* Assuming timestamp counter wrapped */
+       else
+               return false;
+}
diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
index 869747d..5b66fa3 100644
--- a/lib/intel_batchbuffer.h
+++ b/lib/intel_batchbuffer.h
@@ -323,4 +323,18 @@ typedef void (*igt_media_spinfunc_t)(struct 
intel_batchbuffer *batch,
 
 igt_media_spinfunc_t igt_get_media_spinfunc(int devid);
 
+uint32_t igt_batch_used(struct intel_batchbuffer *batch);
+
+void igt_create_delay_bb(int fd, struct intel_batchbuffer *batch,
+               int ringid, uint32_t loops, drm_intel_bo *dest);
+
+void igt_create_timestamp_bb(int fd, struct intel_batchbuffer *batch, int 
ringid,
+                       drm_intel_bo *dest, drm_intel_bo *load, bool write);
+
+void igt_create_noop_bb(struct intel_batchbuffer *batch, int noops);
+
+uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int ringid);
+
+bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2);
+
 #endif
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to