Re: [Intel-gfx] [igt-dev] [PATCH i-g-t] i915/gem_exec_schedule: Measure timeslice distribution when oversaturated

2020-12-15 Thread Tvrtko Ursulin



On 15/12/2020 09:47, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2020-12-15 09:41:09)


On 14/12/2020 20:44, Chris Wilson wrote:

Check that timeslices for an oversaturated system (where there is more
work than can be supported by a single engine) are evenly distributed
between the clients.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
   tests/i915/gem_exec_schedule.c | 179 +
   1 file changed, 179 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index f23d63ac3..263f1dd78 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2516,6 +2516,154 @@ static void measure_semaphore_power(int i915)
   rapl_close();
   }
   
+static int read_timestamp_frequency(int i915)

+{
+ int value = 0;
+ drm_i915_getparam_t gp = {
+ .value = ,
+ .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+ };
+ ioctl(i915, DRM_IOCTL_I915_GETPARAM, );
+ return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+ return (x + y - 1) / y;
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+ return div64_u64_round_up(ticks * NSEC_PER_SEC,
+   read_timestamp_frequency(i915));
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+ const uint32_t *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static uint32_t read_ctx_timestamp(int i915,
+uint32_t ctx,
+const struct intel_execution_engine2 *e)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+ struct drm_i915_gem_relocation_entry reloc;
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = gem_create(i915, 4096),
+ .offset = 32 << 20,
+ .relocs_ptr = to_user_pointer(),
+ .relocation_count = 1,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(),
+ .buffer_count = 1,
+ .flags = e->flags,
+ .rsvd1 = ctx,
+ };
+#define RUNTIME (base + 0x3a8)
+ uint32_t *map, *cs;
+ uint32_t ts;
+
+ igt_require(base);
+
+ cs = map = gem_mmap__device_coherent(i915, obj.handle,
+  0, 4096, PROT_WRITE);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = RUNTIME;
+ memset(, 0, sizeof(reloc));
+ reloc.target_handle = obj.handle;
+ reloc.presumed_offset = obj.offset;
+ reloc.offset = offset_in_page(cs);
+ reloc.delta = 4000;
+ *cs++ = obj.offset + 4000;
+ *cs++ = obj.offset >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ gem_execbuf(i915, );
+ gem_sync(i915, obj.handle);
+ gem_close(i915, obj.handle);
+
+ ts = map[1000];
+ munmap(map, 4096);
+
+ return ts;
+}
+
+static void fairslice(int i915,
+   const struct intel_execution_engine2 *e,
+   unsigned long flags,
+   int duration)
+{
+ const double timeslice_duration_ns = 1e6;
+ igt_spin_t *spin = NULL;
+ double threshold;
+ uint32_t ctx[3];
+ uint32_t ts[3];
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+ ctx[i] = gem_context_clone_with_engines(i915, 0);
+ if (spin == NULL) {
+ spin = __igt_spin_new(i915,
+   .ctx = ctx[i],
+   .engine = e->flags,
+   .flags = flags);
+ } else {
+ struct drm_i915_gem_execbuffer2 eb = {
+ .buffer_count = 1,
+ .buffers_ptr = 
to_user_pointer(>obj[IGT_SPIN_BATCH]),
+ .flags = e->flags,
+ .rsvd1 = ctx[i],
+ };
+ gem_execbuf(i915, );
+ }
+ }
+
+ sleep(duration); /* over the course of many timeslices */
+
+ igt_assert(gem_bo_busy(i915, spin->handle));
+ igt_spin_end(spin);
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+ ts[i] = read_ctx_timestamp(i915, ctx[i], e);
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+ gem_context_destroy(i915, ctx[i]);
+ igt_spin_free(i915, spin);
+
+ /*
+  * If we imagine that the timeslices are randomly distributed to
+  * the virtual engines, we would expect the variation to be modelled
+  * by a drunken walk; ergo sqrt(num_timeslices).
+  */
+ threshold = sqrt(1e9 * duration / timeslice_duration_ns);
+ threshold *= timeslice_duration_ns;
+ threshold *= 2; /* CI safety factor before crying wolf */
+
+ qsort(ts, 3, sizeof(*ts), cmp_u32);
+ 

Re: [Intel-gfx] [igt-dev] [PATCH i-g-t] i915/gem_exec_schedule: Measure timeslice distribution when oversaturated

2020-12-15 Thread Chris Wilson
Quoting Tvrtko Ursulin (2020-12-15 09:41:09)
> 
> On 14/12/2020 20:44, Chris Wilson wrote:
> > Check that timeslices for an oversaturated system (where there is more
> > work than can be supported by a single engine) are evenly distributed
> > between the clients.
> > 
> > Signed-off-by: Chris Wilson 
> > Cc: Tvrtko Ursulin 
> > ---
> >   tests/i915/gem_exec_schedule.c | 179 +
> >   1 file changed, 179 insertions(+)
> > 
> > diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
> > index f23d63ac3..263f1dd78 100644
> > --- a/tests/i915/gem_exec_schedule.c
> > +++ b/tests/i915/gem_exec_schedule.c
> > @@ -2516,6 +2516,154 @@ static void measure_semaphore_power(int i915)
> >   rapl_close();
> >   }
> >   
> > +static int read_timestamp_frequency(int i915)
> > +{
> > + int value = 0;
> > + drm_i915_getparam_t gp = {
> > + .value = ,
> > + .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
> > + };
> > + ioctl(i915, DRM_IOCTL_I915_GETPARAM, );
> > + return value;
> > +}
> > +
> > +static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
> > +{
> > + return (x + y - 1) / y;
> > +}
> > +
> > +static uint64_t ticks_to_ns(int i915, uint64_t ticks)
> > +{
> > + return div64_u64_round_up(ticks * NSEC_PER_SEC,
> > +   read_timestamp_frequency(i915));
> > +}
> > +
> > +static int cmp_u32(const void *A, const void *B)
> > +{
> > + const uint32_t *a = A, *b = B;
> > +
> > + if (*a < *b)
> > + return -1;
> > + else if (*a > *b)
> > + return 1;
> > + else
> > + return 0;
> > +}
> > +
> > +static uint32_t read_ctx_timestamp(int i915,
> > +uint32_t ctx,
> > +const struct intel_execution_engine2 *e)
> > +{
> > + const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
> > + const uint32_t base = gem_engine_mmio_base(i915, e->name);
> > + struct drm_i915_gem_relocation_entry reloc;
> > + struct drm_i915_gem_exec_object2 obj = {
> > + .handle = gem_create(i915, 4096),
> > + .offset = 32 << 20,
> > + .relocs_ptr = to_user_pointer(),
> > + .relocation_count = 1,
> > + };
> > + struct drm_i915_gem_execbuffer2 execbuf = {
> > + .buffers_ptr = to_user_pointer(),
> > + .buffer_count = 1,
> > + .flags = e->flags,
> > + .rsvd1 = ctx,
> > + };
> > +#define RUNTIME (base + 0x3a8)
> > + uint32_t *map, *cs;
> > + uint32_t ts;
> > +
> > + igt_require(base);
> > +
> > + cs = map = gem_mmap__device_coherent(i915, obj.handle,
> > +  0, 4096, PROT_WRITE);
> > +
> > + *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
> > + *cs++ = RUNTIME;
> > + memset(, 0, sizeof(reloc));
> > + reloc.target_handle = obj.handle;
> > + reloc.presumed_offset = obj.offset;
> > + reloc.offset = offset_in_page(cs);
> > + reloc.delta = 4000;
> > + *cs++ = obj.offset + 4000;
> > + *cs++ = obj.offset >> 32;
> > +
> > + *cs++ = MI_BATCH_BUFFER_END;
> > +
> > + gem_execbuf(i915, );
> > + gem_sync(i915, obj.handle);
> > + gem_close(i915, obj.handle);
> > +
> > + ts = map[1000];
> > + munmap(map, 4096);
> > +
> > + return ts;
> > +}
> > +
> > +static void fairslice(int i915,
> > +   const struct intel_execution_engine2 *e,
> > +   unsigned long flags,
> > +   int duration)
> > +{
> > + const double timeslice_duration_ns = 1e6;
> > + igt_spin_t *spin = NULL;
> > + double threshold;
> > + uint32_t ctx[3];
> > + uint32_t ts[3];
> > +
> > + for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
> > + ctx[i] = gem_context_clone_with_engines(i915, 0);
> > + if (spin == NULL) {
> > + spin = __igt_spin_new(i915,
> > +   .ctx = ctx[i],
> > +   .engine = e->flags,
> > +   .flags = flags);
> > + } else {
> > + struct drm_i915_gem_execbuffer2 eb = {
> > + .buffer_count = 1,
> > + .buffers_ptr = 
> > to_user_pointer(>obj[IGT_SPIN_BATCH]),
> > + .flags = e->flags,
> > + .rsvd1 = ctx[i],
> > + };
> > + gem_execbuf(i915, );
> > + }
> > + }
> > +
> > + sleep(duration); /* over the course of many timeslices */
> > +
> > + igt_assert(gem_bo_busy(i915, spin->handle));
> > + igt_spin_end(spin);
> > + for (int i = 0; i < ARRAY_SIZE(ctx); i++)
> > + ts[i] = read_ctx_timestamp(i915, ctx[i], e);
> > +
> > + for (int i = 0; i < ARRAY_SIZE(ctx); i++)
> > +  

Re: [Intel-gfx] [igt-dev] [PATCH i-g-t] i915/gem_exec_schedule: Measure timeslice distribution when oversaturated

2020-12-15 Thread Tvrtko Ursulin



On 14/12/2020 20:44, Chris Wilson wrote:

Check that timeslices for an oversaturated system (where there is more
work than can be supported by a single engine) are evenly distributed
between the clients.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
  tests/i915/gem_exec_schedule.c | 179 +
  1 file changed, 179 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index f23d63ac3..263f1dd78 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2516,6 +2516,154 @@ static void measure_semaphore_power(int i915)
rapl_close();
  }
  
+static int read_timestamp_frequency(int i915)

+{
+   int value = 0;
+   drm_i915_getparam_t gp = {
+   .value = ,
+   .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+   };
+   ioctl(i915, DRM_IOCTL_I915_GETPARAM, );
+   return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+   return (x + y - 1) / y;
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+   return div64_u64_round_up(ticks * NSEC_PER_SEC,
+ read_timestamp_frequency(i915));
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+   const uint32_t *a = A, *b = B;
+
+   if (*a < *b)
+   return -1;
+   else if (*a > *b)
+   return 1;
+   else
+   return 0;
+}
+
+static uint32_t read_ctx_timestamp(int i915,
+  uint32_t ctx,
+  const struct intel_execution_engine2 *e)
+{
+   const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+   const uint32_t base = gem_engine_mmio_base(i915, e->name);
+   struct drm_i915_gem_relocation_entry reloc;
+   struct drm_i915_gem_exec_object2 obj = {
+   .handle = gem_create(i915, 4096),
+   .offset = 32 << 20,
+   .relocs_ptr = to_user_pointer(),
+   .relocation_count = 1,
+   };
+   struct drm_i915_gem_execbuffer2 execbuf = {
+   .buffers_ptr = to_user_pointer(),
+   .buffer_count = 1,
+   .flags = e->flags,
+   .rsvd1 = ctx,
+   };
+#define RUNTIME (base + 0x3a8)
+   uint32_t *map, *cs;
+   uint32_t ts;
+
+   igt_require(base);
+
+   cs = map = gem_mmap__device_coherent(i915, obj.handle,
+0, 4096, PROT_WRITE);
+
+   *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+   *cs++ = RUNTIME;
+   memset(, 0, sizeof(reloc));
+   reloc.target_handle = obj.handle;
+   reloc.presumed_offset = obj.offset;
+   reloc.offset = offset_in_page(cs);
+   reloc.delta = 4000;
+   *cs++ = obj.offset + 4000;
+   *cs++ = obj.offset >> 32;
+
+   *cs++ = MI_BATCH_BUFFER_END;
+
+   gem_execbuf(i915, );
+   gem_sync(i915, obj.handle);
+   gem_close(i915, obj.handle);
+
+   ts = map[1000];
+   munmap(map, 4096);
+
+   return ts;
+}
+
+static void fairslice(int i915,
+ const struct intel_execution_engine2 *e,
+ unsigned long flags,
+ int duration)
+{
+   const double timeslice_duration_ns = 1e6;
+   igt_spin_t *spin = NULL;
+   double threshold;
+   uint32_t ctx[3];
+   uint32_t ts[3];
+
+   for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+   ctx[i] = gem_context_clone_with_engines(i915, 0);
+   if (spin == NULL) {
+   spin = __igt_spin_new(i915,
+ .ctx = ctx[i],
+ .engine = e->flags,
+ .flags = flags);
+   } else {
+   struct drm_i915_gem_execbuffer2 eb = {
+   .buffer_count = 1,
+   .buffers_ptr = 
to_user_pointer(>obj[IGT_SPIN_BATCH]),
+   .flags = e->flags,
+   .rsvd1 = ctx[i],
+   };
+   gem_execbuf(i915, );
+   }
+   }
+
+   sleep(duration); /* over the course of many timeslices */
+
+   igt_assert(gem_bo_busy(i915, spin->handle));
+   igt_spin_end(spin);
+   for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+   ts[i] = read_ctx_timestamp(i915, ctx[i], e);
+
+   for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+   gem_context_destroy(i915, ctx[i]);
+   igt_spin_free(i915, spin);
+
+   /*
+* If we imagine that the timeslices are randomly distributed to
+* the virtual engines, we would expect the variation to be modelled
+* by a drunken walk; ergo sqrt(num_timeslices).
+*/
+   threshold = sqrt(1e9 * duration / timeslice_duration_ns);
+   threshold *= timeslice_duration_ns;
+   threshold