From: Derek Morton <derek.j.mor...@intel.com>

---
 lib/Makefile.sources   |   2 +
 lib/igt.h              |   1 +
 lib/igt_bb_factory.c   | 391 +++++++++++++++++++++++++++++++++++++++++++++
 lib/igt_bb_factory.h   |  47 ++++++
 tests/Makefile.sources |   1 +
 tests/gem_scheduler.c  | 421 +++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 863 insertions(+)
 create mode 100644 lib/igt_bb_factory.c
 create mode 100644 lib/igt_bb_factory.h
 create mode 100644 tests/gem_scheduler.c

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 1316fd2..c450db2 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -7,6 +7,8 @@ libintel_tools_la_SOURCES =     \
        i915_reg.h              \
        i915_pciids.h           \
        igt.h                   \
+       igt_bb_factory.c        \
+       igt_bb_factory.h        \
        igt_debugfs.c           \
        igt_debugfs.h           \
        igt_aux.c               \
diff --git a/lib/igt.h b/lib/igt.h
index d751f24..be87915 100644
--- a/lib/igt.h
+++ b/lib/igt.h
@@ -37,6 +37,7 @@
 #include "igt_kms.h"
 #include "igt_pm.h"
 #include "igt_stats.h"
+#include "igt_bb_factory.h"
 #include "instdone.h"
 #include "intel_batchbuffer.h"
 #include "intel_chipset.h"
diff --git a/lib/igt_bb_factory.c b/lib/igt_bb_factory.c
new file mode 100644
index 0000000..064378a
--- /dev/null
+++ b/lib/igt_bb_factory.c
@@ -0,0 +1,391 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Derek Morton <derek.j.mor...@intel.com>
+ *
+ */
+
+#include "igt.h"
+#include "intel_batchbuffer.h"
+#include <stdint.h>
+#include <inttypes.h>
+#include <time.h>
+
+#define SEC_TO_NSEC (1000 * 1000 * 1000)
+#define DWORDS_TO_BYTES(x) ((x)*4)
+
+#define MI_STORE_REGISTER_MEM(LENGTH)   ((0x024 << 23) | ((LENGTH - 2) & 0xff))
+#define MI_MATH(NrInst)                 ((0x01A << 23) | ((NrInst - 1) & 0x3f))
+#define MI_CONDITIONAL_BATCH_BUFFER_END ((0x036 << 23) | (1 << 21) | 2)
+#define MI_COPY_MEM_MEM                 ((0x02E << 23) | (3))
+
+#define ALU_LOAD(TO, FROM)  ((0x080 << 20) | ((TO) << 10) | (FROM))
+#define ALU_SUB             ( 0x101 << 20)
+#define ALU_STORE(TO, FROM) ((0x180 << 20) | ((TO) << 10) | (FROM))
+
+#define TIMESTAMP_offset      (0x358) /* Elapsed time from system start */
+#define CTX_TIMESTAMP_offset  (0x3A8) /* Elapsed Time from context creation */
+#define ALU_GPU_R0_LSB_offset (0x600)
+#define ALU_GPU_R0_MSB_offset (0x604)
+#define ALU_GPU_R1_LSB_offset (0x608)
+#define ALU_GPU_R1_MSB_offset (0x60C)
+#define ALU_GPU_R2_LSB_offset (0x610)
+#define ALU_GPU_R2_MSB_offset (0x614)
+
+#define ALU_R0_ENCODING   (0x00)
+#define ALU_R1_ENCODING   (0x01)
+#define ALU_SRCA_ENCODING (0x20)
+#define ALU_SRCB_ENCODING (0x21)
+#define ALU_ACCU_ENCODING (0x31)
+
+/**
+ * SECTION:igt_bb_factory
+ * @short_description: Utility functions for creating batch buffers
+ * @title: Batch Buffer Factory
+ * @include: igt.h
+ *
+ * This library implements functions for creating batch buffers which may be
+ * useful to multiple tests.
+ */
+
+static void check_gen_8(int fd)
+{
+       static bool checked = false;
+       if(!checked) {
+               igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
+               checked = true;
+       }
+}
+
+static int bb_address_size_dw(int fd)
+{
+       if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+               return 2;
+       else
+               return 1;
+}
+
+static uint32_t get_register_offset(int ringid)
+{
+       switch (ringid) {
+       case I915_EXEC_RENDER:
+               return 0x02000;
+       case I915_EXEC_BSD:
+               return 0x12000;
+       case I915_EXEC_BLT:
+               return 0x22000;
+       case I915_EXEC_VEBOX:
+               return 0x1A000;
+       default:
+               igt_assert_f(0, "Invalid ringid %d passed to 
get_register_offset()\n", ringid);
+       }
+}
+
+/**
+ * igt_create_delay_bb:
+ * @fd: file descriptor for i915 driver instance
+ * @bufmgr: Buffer manager to be used for creation of batch buffers
+ * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
+ * loops: Number of times to loop
+ * dest: Buffer to use for saving the current loop count and timestamp.
+ *
+ * This creates a batch buffer which will itterate a loop a specified number
+ * of times. Intended for creating batch buffers which take an arbitarily
+ * long time to execute. This can be useful to keep a ring busy while other
+ * batch buffers are queued when testing batch execution order.
+ *
+ * The dest buffer will have a number of Dwords written by the batch buffer
+ * when it runs. They are:
+ * DW0 & DW1 - Counter LSB. Will be 0 if the batch buffer finished succesfully.
+ * DW2 Timestamp - Elapsed time since system start when batch buffer ran.
+ *
+ * Returns:
+ * The struct intel_batchbuffer created.
+ */
+struct intel_batchbuffer *igt_create_delay_bb(int fd, drm_intel_bufmgr *bufmgr,
+               int ringid, uint32_t loops, drm_intel_bo *dest)
+{
+       struct intel_batchbuffer *batch;
+       int addr_size_dw;
+       uint32_t regOffset;
+
+       check_gen_8(fd);
+
+       addr_size_dw = bb_address_size_dw(fd);
+       regOffset = get_register_offset(ringid);
+       batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+       igt_assert(batch);
+
+       BEGIN_BATCH(32, 5);
+       /* store current timestamp in DW2 */
+       OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+       OUT_BATCH(regOffset + TIMESTAMP_offset);
+       OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 
I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));
+
+       /* Load R0 with loops */
+       OUT_BATCH(MI_LOAD_REGISTER_IMM);
+       OUT_BATCH(regOffset + ALU_GPU_R0_LSB_offset);
+       OUT_BATCH(loops);
+       OUT_BATCH(MI_LOAD_REGISTER_IMM);
+       OUT_BATCH(regOffset + ALU_GPU_R0_MSB_offset);
+       OUT_BATCH(0x00000000);
+       /* Load R1 with 1 */
+       OUT_BATCH(MI_LOAD_REGISTER_IMM);
+       OUT_BATCH(regOffset + ALU_GPU_R1_LSB_offset);
+       OUT_BATCH(0x00000001);
+       OUT_BATCH(MI_LOAD_REGISTER_IMM);
+       OUT_BATCH(regOffset + ALU_GPU_R1_MSB_offset);
+       OUT_BATCH(0x00000000);
+       /* Copy R0 / R1 into SRCA / SRCB, Perform R0 - R1, Store result in R0 */
+       /* e.g. R0 -= 1 */
+       OUT_BATCH(MI_MATH(4));
+       OUT_BATCH(ALU_LOAD(ALU_SRCA_ENCODING, ALU_R0_ENCODING));
+       OUT_BATCH(ALU_LOAD(ALU_SRCB_ENCODING, ALU_R1_ENCODING));
+       OUT_BATCH(ALU_SUB);
+       OUT_BATCH(ALU_STORE(ALU_R0_ENCODING, ALU_ACCU_ENCODING));
+       /* Copy R0 to dest
+        * On Gen8 MI_CONDITIONAL_BATCH_BUFFER_END BSD ring Compare address
+        * points to 2 Dwords, a mask (DW0) and data (DW1) which are ANDed
+        * together.
+        * On Gen9+, and the other rings on Gen8 Compare address points to
+        * just Data (DW0). For simplicity always copy R0 LSB to DW0 and DW1.
+        */
+       OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+       OUT_BATCH(regOffset + ALU_GPU_R0_LSB_offset);
+       OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 
I915_GEM_DOMAIN_INSTRUCTION, 0);
+       OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+       OUT_BATCH(regOffset + ALU_GPU_R0_LSB_offset);
+       OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 
I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(1));
+       /* Repeat until R0 == 0 */
+       OUT_BATCH(MI_CONDITIONAL_BATCH_BUFFER_END);
+       OUT_BATCH(0x00000000);
+       OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 
I915_GEM_DOMAIN_INSTRUCTION, 0);
+       OUT_BATCH(MI_BATCH_BUFFER_START | (addr_size_dw - 1));
+       OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 
DWORDS_TO_BYTES(15));
+       /* Should never get here, but end if it happens */
+
+       OUT_BATCH(MI_BATCH_BUFFER_END);
+       ADVANCE_BATCH();
+
+       return batch;
+}
+
+/**
+ * igt_create_timestamp_bb:
+ * @fd: file descriptor for i915 driver instance
+ * @bufmgr: Buffer manager to be used for creation of batch buffers
+ * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
+ * dest: Buffer to use for saving the timestamps.
+ * load: Buffer to access. Set NULL if not required.
+ * write: If true and load is not NULL, will also write a timestamp to load
+ * buffer. If false and load is not NULL, will read from load buffer into dest.
+ * Intended for dependency checking.
+ *
+ * This creates a batch buffer which writes timestamps into a buffer object.
+ *
+ * The dest buffer will have a number of Dwords written by the batch buffer
+ * when it runs. They are:
+ * DW0 Reported timestamp - Elapsed time since system start.
+ * DW1 Context timestamp - Elapsed time since context was created.
+ * DW2 Value copied from DW0 of load if write == false
+ *
+ * Returns:
+ * The struct intel_batchbuffer created.
+ */
+struct intel_batchbuffer *igt_create_timestamp_bb(int fd, drm_intel_bufmgr 
*bufmgr,
+               int ringid, drm_intel_bo *dest, drm_intel_bo *load, bool write)
+{
+       struct intel_batchbuffer *batch;
+       int addr_size_dw;
+       uint32_t regOffset;
+
+       check_gen_8(fd);
+
+       addr_size_dw = bb_address_size_dw(fd);
+       regOffset = get_register_offset(ringid);
+       batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+       igt_assert(batch);
+
+       BEGIN_BATCH(6, 2);
+       /* store current reported timestamp in DW0 */
+       OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+       OUT_BATCH(regOffset + TIMESTAMP_offset);
+       OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 
I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));
+
+       /* store current context timestamp in DW1 */
+       OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+       OUT_BATCH(regOffset + CTX_TIMESTAMP_offset);
+       OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 
I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(1));
+
+       ADVANCE_BATCH();
+
+       if(load != NULL) {
+               if(write) {
+                       BEGIN_BATCH(3, 1);
+                       OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+                       OUT_BATCH(regOffset + TIMESTAMP_offset);
+                       OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, 
I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));
+                       ADVANCE_BATCH();
+               }
+               else {
+                       BEGIN_BATCH(3, 2);
+                       OUT_BATCH(MI_COPY_MEM_MEM);
+                       OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 
I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));
+                       OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, 0, 
DWORDS_TO_BYTES(0));
+                       ADVANCE_BATCH();
+               }
+       }
+
+       BEGIN_BATCH(1, 0);
+       OUT_BATCH(MI_BATCH_BUFFER_END);
+       ADVANCE_BATCH();
+
+       return batch;
+}
+
+/**
+ * igt_create_noop_bb:
+ * @fd: file descriptor for i915 driver instance
+ * @bufmgr: Buffer manager to be used for creation of batch buffers
+ * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
+ * noops: Number of MI_NOOP instructions to add to the batch buffer.
+ *
+ * This creates a batch buffer with a specified number of MI_NOOP instructions.
+ *
+ * Returns:
+ * The struct intel_batchbuffer created.
+ */
+struct intel_batchbuffer *igt_create_noop_bb(int fd, drm_intel_bufmgr *bufmgr,
+               int ringid, int noops)
+{
+       struct intel_batchbuffer *batch;
+       int loop;
+
+       batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+       igt_assert(batch);
+
+       BEGIN_BATCH(noops + 1, 0);
+       for(loop = 0; loop < noops; loop++)
+               OUT_BATCH(MI_NOOP);
+       OUT_BATCH(MI_BATCH_BUFFER_END);
+       ADVANCE_BATCH();
+
+       return batch;
+}
+
+/* Store calibrated values so they only need calculating once.
+ * I915_EXEC_RING_MASK allows 3 bits for ring ids so allow for storing 8 
values */
+static uint32_t calibrated_ring_value[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+
+/**
+ * igt_calibrate_delay_bb:
+ * @fd: file descriptor for i915 driver instance
+ * @bufmgr: Buffer manager to be used for creation of batch buffers
+ * ringid: Ring to calibrate. e.g. I915_EXEC_RENDER
+ *
+ * This calculates the value of loops that would need to be passed to
+ * igt_create_delay_bb() to create a delay of about 1 second on the specified
+ * ring.
+ *
+ * Returns:
+ * uint32_t to be passed to igt_create_delay_bb().
+ */
+uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int ringid)
+{
+       uint32_t *buf;
+       struct intel_batchbuffer *bb;
+       struct timespec start, end;
+       uint64_t duration;
+       uint64_t calibrated;
+       uint32_t regOffset = get_register_offset(ringid);
+       drm_intel_bo *target_bo;
+
+       igt_assert(ringid < 8);
+       if(calibrated_ring_value[ringid] != 0)
+               return calibrated_ring_value[ringid];
+
+       target_bo = drm_intel_bo_alloc(bufmgr, "target bo", 4096, 4096);
+       igt_assert(target_bo);
+
+       /* Put some non zero values in the target bo */
+       drm_intel_bo_map(target_bo, 1);
+       buf = target_bo->virtual;
+       buf[0] = 0xff;
+       drm_intel_bo_unmap(target_bo);
+
+       bb = igt_create_delay_bb(fd, bufmgr, ringid, 0x100000, target_bo);
+
+       gem_quiescent_gpu(fd);
+       clock_gettime(CLOCK_MONOTONIC, &start);
+       intel_batchbuffer_flush_on_ring(bb, ringid);
+       /* This will not return until the bo has finished executing */
+       drm_intel_bo_map(target_bo, 0);
+       clock_gettime(CLOCK_MONOTONIC, &end);
+
+       buf = target_bo->virtual;
+       /* buf[0] in the target buffer should be 0 if the batch buffer 
completed */
+       igt_assert_f(buf[0] == 0, "buf[0] expected 0x0, got 0x%x\n", buf[0]);
+
+       duration = (((end.tv_sec - start.tv_sec) * SEC_TO_NSEC) + end.tv_nsec) 
- start.tv_nsec;
+       calibrated = (((uint64_t)(0x100000) * SEC_TO_NSEC) / duration);
+       igt_debug("Uncalibrated run took %" PRIu64 ".%04" PRIu64 "s\n",
+                 duration / SEC_TO_NSEC,
+                 (duration % SEC_TO_NSEC) / 100000);
+       drm_intel_bo_unreference(target_bo);
+       intel_batchbuffer_free(bb);
+
+       /* Sanity check. If duration < 10ms, something has clearly gone wrong */
+       igt_assert(duration > (SEC_TO_NSEC  / 100));
+
+       if (calibrated > 0xffffffff) {
+               igt_warn("Truncating to max uint32\n");
+               return 0xffffffff;
+       }
+
+       calibrated_ring_value[ringid] = (uint32_t)calibrated;
+       return (uint32_t)calibrated;
+}
+
+/**
+ * igt_compare_timestamps:
+ * @ts1: timestamp 1
+ * @ts2: timestamp 2
+ *
+ * This compares two uint32_t timestamps. To handle wrapping it assumes the
+ * difference between the two timestamps is less than 1/4 the max elapsed time
+ * represented by the counters.
+ * It also assumes the timestamps are samples from the same counter.
+ *
+ * Returns:
+ * True if ts2 > ts1, allowing for counter wrapping, false otherwise.
+ */
+
+bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2)
+{
+       if (ts2 > ts1)
+               return true;
+       else if ((ts1 > 0x80000000) && (ts2 < 0x40000000))
+               return true; /* Assuming timestamp counter wrapped */
+       else
+               return false;
+}
diff --git a/lib/igt_bb_factory.h b/lib/igt_bb_factory.h
new file mode 100644
index 0000000..3ab7f13
--- /dev/null
+++ b/lib/igt_bb_factory.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Derek Morton <derek.j.mor...@intel.com>
+ *
+ */
+
+#ifndef IGT_BB_FACTORY_H
+#define IGT_BB_FACTORY_H
+
+#include "intel_batchbuffer.h"
+#include <stdint.h>
+
+struct intel_batchbuffer *igt_create_delay_bb(int fd, drm_intel_bufmgr *bufmgr,
+               int ringid, uint32_t loops, drm_intel_bo *dest);
+
+struct intel_batchbuffer *igt_create_timestamp_bb(int fd, drm_intel_bufmgr 
*bufmgr,
+               int ringid, drm_intel_bo *dest, drm_intel_bo *load, bool write);
+
+struct intel_batchbuffer *igt_create_noop_bb(int fd, drm_intel_bufmgr *bufmgr,
+               int ringid, int noops);
+
+uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int ringid);
+
+bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2);
+
+#endif /* IGT_BB_FACTORY_H */
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index c52be2c..6acf2f6 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -69,6 +69,7 @@ TESTS_progs_M = \
        gem_request_retire \
        gem_reset_stats \
        gem_ringfill \
+       gem_scheduler \
        gem_set_tiling_vs_blt \
        gem_shrink \
        gem_softpin \
diff --git a/tests/gem_scheduler.c b/tests/gem_scheduler.c
new file mode 100644
index 0000000..b108343
--- /dev/null
+++ b/tests/gem_scheduler.c
@@ -0,0 +1,421 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Derek Morton <derek.j.mor...@intel.com>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <time.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+
+IGT_TEST_DESCRIPTION("Check scheduler behaviour. Basic tests ensure 
independant "
+                     "batch buffers of the same priority are executed in "
+                     "submission order. Priority tests ensure higher priority "
+                     "batch buffers are executed first. Read-read tests ensure 
"
+                     "batch buffers with a read dependency to the same buffer "
+                     "object do not block each other. Write-write dependency "
+                     "tests ensure batch buffers with a write dependency to a "
+                     "buffer object will be executed in submission order but "
+                     "will not block execution of other independant batch "
+                     "buffers.");
+
+#define SEC_TO_NSEC (1000 * 1000 * 1000)
+
+struct ring {
+       const char *name;
+       int id;
+} rings[] = {
+       { "render", I915_EXEC_RENDER },
+       { "bsd",    I915_EXEC_BSD },
+       { "blt",    I915_EXEC_BLT },
+       { "vebox",  I915_EXEC_VEBOX },
+       { NULL,     0 },
+};
+
+/* Basic test. Check batch buffers of the same priority and with no 
dependencies
+ * are executed in the order they are submitted.
+ */
+static void run_test_basic(int in_flight, int ringid, bool priority)
+{
+       int fd[3];
+       int loop;
+       drm_intel_bufmgr *bufmgr[3];
+       uint32_t *delay_buf, *ts1_buf, *ts2_buf;
+       struct intel_batchbuffer *ts1_bb, *ts2_bb;
+       struct intel_batchbuffer **in_flight_bbs;
+       uint32_t calibrated_1s;
+       drm_intel_bo *delay_bo, *ts1_bo, *ts2_bo;
+
+       in_flight_bbs = malloc(in_flight * sizeof(struct intel_batchbuffer *));
+       igt_assert(in_flight_bbs);
+
+       /* Need multiple i915 fd's. Scheduler will not change execution order of
+        * batch buffers from the same context.
+        */
+       for(loop=0; loop < 3; loop++) {
+               struct intel_batchbuffer *noop_bb;
+               fd[loop] = drm_open_driver(DRIVER_INTEL);
+               igt_assert(fd[loop] >= 0);
+               bufmgr[loop] = drm_intel_bufmgr_gem_init(fd[loop], 4096);
+               igt_assert(bufmgr[loop]);
+               drm_intel_bufmgr_gem_enable_reuse(bufmgr[loop]);
+               /* Send a noop batch buffer to force any deferred 
initialisation */
+               noop_bb = igt_create_noop_bb(fd[loop], bufmgr[loop], ringid, 5);
+               intel_batchbuffer_flush_on_ring(noop_bb, ringid);
+               intel_batchbuffer_free(noop_bb);
+       }
+
+       if(priority) {
+               struct local_i915_gem_context_param param;
+               param.context = 0; /* Default context */
+               param.size = 0;
+               param.param = LOCAL_CONTEXT_PARAM_PRIORITY;
+               param.value = 1000;
+               gem_context_set_param(fd[2], &param);
+       }
+
+       /* Create buffer objects */
+       delay_bo = drm_intel_bo_alloc(bufmgr[0], "delay bo", 4096, 4096);
+       igt_assert(delay_bo);
+       ts1_bo = drm_intel_bo_alloc(bufmgr[1], "ts1 bo", 4096, 4096);
+       igt_assert(ts1_bo);
+       ts2_bo = drm_intel_bo_alloc(bufmgr[2], "ts2 bo", 4096, 4096);
+       igt_assert(ts2_bo);
+
+       /* Put some non zero values in the delay bo */
+       drm_intel_bo_map(delay_bo, 1);
+       delay_buf = delay_bo->virtual;
+       delay_buf[0] = 0xff;
+       drm_intel_bo_unmap(delay_bo);
+
+       calibrated_1s = igt_calibrate_delay_bb(fd[0], bufmgr[0], ringid);
+
+       /* Batch buffers to fill the ring */
+       in_flight_bbs[0] = igt_create_delay_bb(fd[0], bufmgr[0], ringid, 
calibrated_1s, delay_bo);
+       for(loop = 1; loop < in_flight; loop++)
+               in_flight_bbs[loop] = igt_create_noop_bb(fd[0], bufmgr[0], 
ringid, 5);
+
+       /* Extra batch buffers in the scheduler queue */
+       ts1_bb = igt_create_timestamp_bb(fd[1], bufmgr[1], ringid, ts1_bo, 
NULL, false);
+       ts2_bb = igt_create_timestamp_bb(fd[2], bufmgr[2], ringid, ts2_bo, 
NULL, false);
+
+       /* Flush batchbuffers */
+       for(loop = 0; loop < in_flight; loop++)
+               intel_batchbuffer_flush_on_ring(in_flight_bbs[loop], ringid);
+       intel_batchbuffer_flush_on_ring(ts1_bb, ringid);
+       intel_batchbuffer_flush_on_ring(ts2_bb, ringid);
+
+       /* This will not return until the bo has finished executing */
+       drm_intel_bo_map(delay_bo, 0);
+       drm_intel_bo_map(ts1_bo, 0);
+       drm_intel_bo_map(ts2_bo, 0);
+
+       delay_buf = delay_bo->virtual;
+       ts1_buf = ts1_bo->virtual;
+       ts2_buf = ts2_bo->virtual;
+
+       igt_debug("Delay Timestamp = 0x%08" PRIx32 "\n", delay_buf[2]);
+       igt_debug("TS1 Timestamp = 0x%08" PRIx32 "\n", ts1_buf[0]);
+       igt_debug("TS2 Timestamp = 0x%08" PRIx32 "\n", ts2_buf[0]);
+
+       /* buf[0] in the target buffer should be 0 if the batch buffer 
completed */
+       igt_assert_f(delay_buf[0] == 0,
+                    "delay_buf[0] expected 0x0, got 0x%" PRIx32 "\n", 
delay_buf[0]);
+
+       igt_assert_f(igt_compare_timestamps(delay_buf[2], ts1_buf[0]),
+                    "Delay ts (0x%08" PRIx32 ") > TS1 ts (0x%08" PRIx32 ")\n",
+                    delay_buf[2], ts1_buf[0]);
+       if(priority)
+               igt_assert_f(igt_compare_timestamps(ts2_buf[0], ts1_buf[0]),
+                            "TS2 ts (0x%08" PRIx32 ") > TS1 ts (0x%08" PRIx32 
")\n",
+                            ts2_buf[0], ts1_buf[0]);
+       else
+               igt_assert_f(igt_compare_timestamps(ts1_buf[0], ts2_buf[0]),
+                    "TS1 ts (0x%08" PRIx32 ") > TS2 ts (0x%08" PRIx32 ")\n",
+                    ts1_buf[0], ts2_buf[0]);
+
+       /* Cleanup */
+       for(loop = 0; loop < in_flight; loop++)
+               intel_batchbuffer_free(in_flight_bbs[loop]);
+       intel_batchbuffer_free(ts1_bb);
+       intel_batchbuffer_free(ts2_bb);
+
+       drm_intel_bo_unreference(delay_bo);
+       drm_intel_bo_unreference(ts1_bo);
+       drm_intel_bo_unreference(ts2_bo);
+       for(loop = 0; loop < 3; loop++) {
+               drm_intel_bufmgr_destroy(bufmgr[loop]);
+               close(fd[loop]);
+       }
+       free(in_flight_bbs);
+}
+
+/* Dependency test.
+ * write=0, Submit batch buffers with read dependencies to all rings. Delay one
+ * with a long executing batch buffer. Check the others are not held up.
+ * write=1, Submit batch buffers with write dependencies to all rings. Delay 
one
+ * with a long executing batch buffer. Also submit batch buffers with no
+ * dependencies to all rings. Batch buffers with write dependencies should be
+ * executed in submission order. The batch buffers with no dependencies should
+ * not be held up.
+ */
+static void run_test_dependency(int in_flight, int ring, bool write)
+{
+       int fd[4], fd2[4];
+       int loop;
+       int prime_fd;
+       uint32_t *delay_buf, *ts_buf[4], *ts2_buf[4], *shared_buf;
+       uint32_t calibrated_1s;
+       drm_intel_bufmgr *bufmgr[4], *bufmgr2[4];
+       struct intel_batchbuffer *ts_bb[4], *ts2_bb[4], **in_flight_bbs;
+       drm_intel_bo *delay_bo, *ts_bo[4], *ts2_bo[4], *shared_bo[4];
+
+       in_flight_bbs = malloc(in_flight * sizeof(struct intel_batchbuffer *));
+       igt_assert(in_flight_bbs);
+
+       /* Need multiple i915 fd's. Scheduler will not change execution order of
+        * batch buffers from the same context.
+        */
+       for(loop=0; loop < 4; loop++) {
+               struct intel_batchbuffer *noop_bb;
+               fd[loop] = drm_open_driver(DRIVER_INTEL);
+               igt_assert(fd[loop] >= 0);
+               bufmgr[loop] = drm_intel_bufmgr_gem_init(fd[loop], 4096);
+               igt_assert(bufmgr[loop]);
+               drm_intel_bufmgr_gem_enable_reuse(bufmgr[loop]);
+               /* Send a noop batch buffer to force any deferred 
initialisation */
+               noop_bb = igt_create_noop_bb(fd[loop], bufmgr[loop], 
rings[loop].id, 5);
+               intel_batchbuffer_flush_on_ring(noop_bb, rings[loop].id);
+               intel_batchbuffer_free(noop_bb);
+               if(write) {
+                       struct intel_batchbuffer *noop_bb2;
+                       fd2[loop] = drm_open_driver(DRIVER_INTEL);
+                       igt_assert(fd2[loop] >= 0);
+                       bufmgr2[loop] = drm_intel_bufmgr_gem_init(fd2[loop], 
4096);
+                       igt_assert(bufmgr2[loop]);
+                       drm_intel_bufmgr_gem_enable_reuse(bufmgr2[loop]);
+                       /* Send a noop batch buffer to force any deferred 
initialisation */
+                       noop_bb2 = igt_create_noop_bb(fd2[loop], bufmgr2[loop], 
rings[loop].id, 5);
+                       intel_batchbuffer_flush_on_ring(noop_bb2, 
rings[loop].id);
+                       intel_batchbuffer_free(noop_bb2);
+               }
+       }
+
+       /* Create buffer objects */
+       delay_bo = drm_intel_bo_alloc(bufmgr[ring], "delay bo", 4096, 4096);
+       igt_assert(delay_bo);
+       for(loop = 0; loop < 4; loop++) {
+               ts_bo[loop] = drm_intel_bo_alloc(bufmgr[loop], "ts bo", 4096, 
4096);
+               igt_assert(ts_bo[loop]);
+               if(write) {
+                       ts2_bo[loop] = drm_intel_bo_alloc(bufmgr2[loop], "ts 
bo", 4096, 4096);
+                       igt_assert(ts2_bo[loop]);
+               }
+       }
+
+       /* Create shared buffer object */
+       shared_bo[0] = drm_intel_bo_alloc(bufmgr[0], "shared bo", 4096, 4096);
+       igt_assert(shared_bo[0]);
+
+       drm_intel_bo_gem_export_to_prime(shared_bo[0], &prime_fd);
+       for(loop = 1; loop < 4; loop++) {
+               shared_bo[loop] = 
drm_intel_bo_gem_create_from_prime(bufmgr[loop],
+                                                                    prime_fd, 
4096);
+               igt_assert(shared_bo[loop]);
+       }
+       close(prime_fd);
+
+       /* Put some non zero values in the delay and shared bo */
+       drm_intel_bo_map(delay_bo, 1);
+       delay_buf = delay_bo->virtual;
+       delay_buf[0] = 0xff;
+       drm_intel_bo_unmap(delay_bo);
+       drm_intel_bo_map(shared_bo[0], 1);
+       shared_buf = shared_bo[0]->virtual;
+       shared_buf[0] = 0xff00ff00;
+       drm_intel_bo_unmap(shared_bo[0]);
+
+       calibrated_1s = igt_calibrate_delay_bb(fd[ring], bufmgr[ring], 
rings[ring].id);
+
+       /* Batch buffers to fill the ring */
+       in_flight_bbs[0] = igt_create_delay_bb(fd[ring], bufmgr[ring], 
rings[ring].id, calibrated_1s, delay_bo);
+       for(loop = 1; loop < in_flight; loop++)
+               in_flight_bbs[loop] = igt_create_noop_bb(fd[ring], 
bufmgr[ring], rings[ring].id, 5);
+
+       for(loop = 0; loop < 4; loop++) {
+               ts_bb[loop] = igt_create_timestamp_bb(fd[loop], bufmgr[loop], 
rings[loop].id, ts_bo[loop], shared_bo[loop], write);
+               if(write)
+                       ts2_bb[loop] = igt_create_timestamp_bb(fd2[loop], 
bufmgr2[loop], rings[loop].id, ts2_bo[loop], NULL, false);
+       }
+
+       /* Flush batchbuffers */
+       for(loop = 0; loop < in_flight; loop++)
+               intel_batchbuffer_flush_on_ring(in_flight_bbs[loop], 
rings[ring].id);
+
+       intel_batchbuffer_flush_on_ring(ts_bb[ring], rings[ring].id);
+       for(loop = 0; loop < 4; loop++)
+               if(loop != ring)
+                       intel_batchbuffer_flush_on_ring(ts_bb[loop], 
rings[loop].id);
+
+       if(write) {
+               intel_batchbuffer_flush_on_ring(ts2_bb[ring], rings[ring].id);
+               for(loop = 0; loop < 4; loop++)
+                       if(loop != ring)
+                               intel_batchbuffer_flush_on_ring(ts2_bb[loop], 
rings[loop].id);
+       }
+
+       /* This will not return until the bo has finished executing */
+       drm_intel_bo_map(delay_bo, 0);
+       delay_buf = delay_bo->virtual;
+       for(loop = 0; loop < 4; loop++) {
+               drm_intel_bo_map(ts_bo[loop], 0);
+               ts_buf[loop] = ts_bo[loop]->virtual;
+               if(write) {
+                       drm_intel_bo_map(ts2_bo[loop], 0);
+                       ts2_buf[loop] = ts2_bo[loop]->virtual;
+               }
+       }
+
+       /* buf[0] in the target buffer should be 0 if the batch buffer 
completed */
+       igt_assert_f(delay_buf[0] == 0, "delay_buf[0] expected 0x0, got 0x%" 
PRIx32 "\n", delay_buf[0]);
+
+       igt_debug("%6s delay timestamp = 0x%08" PRIx32 "\n", rings[ring].name, 
delay_buf[2]);
+       igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n", rings[0].name, 
ts_buf[0][0]);
+       igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n", rings[1].name, 
ts_buf[1][0]);
+       igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n", rings[2].name, 
ts_buf[2][0]);
+       igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n", rings[3].name, 
ts_buf[3][0]);
+       if(write) {
+               igt_debug("Independant batch buffers\n");
+               igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n", 
rings[0].name, ts2_buf[0][0]);
+               igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n", 
rings[1].name, ts2_buf[1][0]);
+               igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n", 
rings[2].name, ts2_buf[2][0]);
+               igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n", 
rings[3].name, ts2_buf[3][0]);
+       }
+
+       for(loop = 0; loop < 4; loop++) {
+               if(loop != ring) {
+                       if(write) {
+                               /* Write dependency, delayed ring should run 
first */
+                               
igt_assert_f(igt_compare_timestamps(ts_buf[ring][0], ts_buf[loop][0]),
+                                            "%s ran before %s - 0x%08" PRIx32 
" vs 0x%08" PRIx32 "\n",
+                                            rings[loop].name, rings[ring].name,
+                                            ts_buf[loop][0], ts_buf[ring][0]);
+                               /* Second bb without dependency should run 
first */
+                               
igt_assert_f(igt_compare_timestamps(ts2_buf[loop][0], ts_buf[loop][0]),
+                                            "(%s) independant bb was held up - 
0x%08" PRIx32 " vs 0x%08" PRIx32 "\n",
+                                            rings[loop].name, ts_buf[loop][0], 
ts2_buf[loop][0]);
+                       }
+                       else
+                               /* Read dependency, delayed ring should run 
last */
+                               
igt_assert_f(igt_compare_timestamps(ts_buf[loop][0], ts_buf[ring][0]),
+                                            "%s ran after %s - 0x%08" PRIx32 " 
vs 0x%08" PRIx32 "\n",
+                                            rings[loop].name, rings[ring].name,
+                                            ts_buf[loop][0], ts_buf[ring][0]);
+               }
+       }
+
+       /* Cleanup */
+       for(loop = 0; loop < in_flight; loop++)
+               intel_batchbuffer_free(in_flight_bbs[loop]);
+
+       for(loop = 0; loop < 4; loop++) {
+               intel_batchbuffer_free(ts_bb[loop]);
+               drm_intel_bo_unreference(ts_bo[loop]);
+               drm_intel_bo_unreference(shared_bo[loop]);
+               if(write) {
+                       intel_batchbuffer_free(ts2_bb[loop]);
+                       drm_intel_bo_unreference(ts2_bo[loop]);
+               }
+       }
+
+       drm_intel_bo_unreference(delay_bo);
+
+       for(loop = 0; loop < 4; loop++) {
+               drm_intel_bufmgr_destroy(bufmgr[loop]);
+               close(fd[loop]);
+               if(write) {
+                       drm_intel_bufmgr_destroy(bufmgr2[loop]);
+                       close(fd2[loop]);
+               }
+       }
+
+       free(in_flight_bbs);
+}
+
+igt_main
+{
+       int loop;
+       int in_flight;
+
+
+       igt_fixture {
+               int debug_fd;
+               int l;
+               char buf[4];
+               /* Get nbr of batch buffers that the scheduler will queue in the
+                * HW. If this debugfs file does not exist there is no scheduler
+                * so skip the test.
+                */
+               debug_fd = igt_debugfs_open("i915_scheduler_min_flying", 
O_RDONLY);
+               igt_skip_on(debug_fd == -1);
+               l = read(debug_fd, buf, sizeof(buf)-1);
+               igt_assert(l > 0);
+               igt_assert(l < sizeof(buf));
+               buf[l] = '\0';
+               if(sscanf(buf, "0x%2x", &in_flight) != 1)
+                       igt_assert_f(sscanf(buf, "%2d", &in_flight) == 1,
+                                    "Error reading from 
i915_scheduler_min_flying\n");
+               close(debug_fd);
+               igt_debug("in flight = %d\n", in_flight);
+       }
+
+       for (loop=0; rings[loop].name != NULL; loop++)
+               igt_subtest_f("%s-basic", rings[loop].name) {
+                       run_test_basic(in_flight, rings[loop].id, false);
+               }
+
+       for (loop=0; rings[loop].name != NULL; loop++)
+               igt_subtest_f("%s-priority", rings[loop].name) {
+                       run_test_basic(in_flight, rings[loop].id, true);
+               }
+
+       for (loop=0; rings[loop].name != NULL; loop++)
+               igt_subtest_f("%s-read-read", rings[loop].name) {
+                       run_test_dependency(in_flight, loop, false);
+               }
+
+       for (loop=0; rings[loop].name != NULL; loop++)
+               igt_subtest_f("%s-write-write", rings[loop].name) {
+                       run_test_dependency(in_flight, loop, true);
+               }
+
+}
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to