[Intel-gfx] [PATCH] drm/i915/execlists: Tweak virtual unsubmission

2019-10-13 Thread Chris Wilson
Since commit e2144503bf3b ("drm/i915: Prevent bonded requests from
overtaking each other on preemption") we have restricted requests to run
on their chosen engine across preemption events. We can take this
restriction into account to know that we will want to resubmit those
requests onto the same physical engine, and so can shortcircuit the
virtual engine selection process and keep the request on the same
engine during unwind.

References: e2144503bf3b ("drm/i915: Prevent bonded requests from overtaking 
each other on preemption")
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 6 +++---
 drivers/gpu/drm/i915/i915_request.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index e6bf633b48d5..03732e3f5ec7 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -895,7 +895,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_for_each_entry_safe_reverse(rq, rn,
 >active.requests,
 sched.link) {
-   struct intel_engine_cs *owner;
 
if (i915_request_completed(rq))
continue; /* XXX */
@@ -910,8 +909,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 * engine so that it can be moved across onto another physical
 * engine as load dictates.
 */
-   owner = rq->hw_context->engine;
-   if (likely(owner == engine)) {
+   if (likely(rq->execution_mask == engine->mask)) {
GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
if (rq_prio(rq) != prio) {
prio = rq_prio(rq);
@@ -922,6 +920,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_move(>sched.link, pl);
active = rq;
} else {
+   struct intel_engine_cs *owner = rq->hw_context->engine;
+
/*
 * Decouple the virtual breadcrumb before moving it
 * back to the virtual engine -- we don't want the
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 437f9fc6282e..b8a54572a4f8 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -649,6 +649,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
rq->gem_context = ce->gem_context;
rq->engine = ce->engine;
rq->ring = ce->ring;
+   rq->execution_mask = ce->engine->mask;
 
rcu_assign_pointer(rq->timeline, tl);
rq->hwsp_seqno = tl->hwsp_seqno;
@@ -671,7 +672,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
rq->batch = NULL;
rq->capture_list = NULL;
rq->flags = 0;
-   rq->execution_mask = ALL_ENGINES;
 
INIT_LIST_HEAD(>execute_cb);
 
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/execlists: Assert tasklet is locked for process_csb()

2019-10-13 Thread Chris Wilson
We rely on only the tasklet being allowed to call into process_csb(), so
assert that is locked when we do. As the tasklet uses a simple bitlock,
there is no strong lockdep checking so we must make do with a plain
assertion that the tasklet is running and assume that we are the
tasklet!

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 1 +
 drivers/gpu/drm/i915/i915_gem.h | 5 +
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 8be9e69d5718..ab20433182d1 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1984,6 +1984,7 @@ static void process_csb(struct intel_engine_cs *engine)
u8 head, tail;
 
GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
+   GEM_BUG_ON(!tasklet_is_locked(>tasklet));
 
/*
 * Note that csb_write, csb_status may be either in HWSP or mmio.
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index db20d2b0842b..f6f9675848b8 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -86,6 +86,11 @@ static inline void tasklet_lock(struct tasklet_struct *t)
cpu_relax();
 }
 
+static inline bool tasklet_is_locked(const struct tasklet_struct *t)
+{
+   return test_bit(TASKLET_STATE_RUN, >state);
+}
+
 static inline void __tasklet_disable_sync_once(struct tasklet_struct *t)
 {
if (!atomic_fetch_inc(>count))
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/execlists: Clear semaphore immediately upon ELSP promotion

2019-10-13 Thread Chris Wilson
There is no significance to our delay before clearing the semaphore the
engine is waiting on, so release it as soon as we acknowledge the CS
update following our preemption request.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 16b878d35814..32a30d36eb7b 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1903,6 +1903,9 @@ static void process_csb(struct intel_engine_cs *engine)
else
promote = gen8_csb_parse(execlists, buf + 2 * head);
if (promote) {
+   if (!inject_preempt_hang(execlists))
+   ring_set_paused(engine, 0);
+
/* cancel old inflight, prepare for switch */
trace_ports(execlists, "preempted", execlists->active);
while (*execlists->active)
@@ -1919,9 +1922,6 @@ static void process_csb(struct intel_engine_cs *engine)
if (enable_timeslice(execlists))
mod_timer(>timer, jiffies + 1);
 
-   if (!inject_preempt_hang(execlists))
-   ring_set_paused(engine, 0);
-
WRITE_ONCE(execlists->pending[0], NULL);
} else {
GEM_BUG_ON(!*execlists->active);
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH i-g-t 1/2] lib: Generalise rapl interface

2019-10-13 Thread Chris Wilson
We can use our existing rapl interface that monitors gpu power, to also
sample the other rapl domains such as package, cores and ram.

Signed-off-by: Chris Wilson 
Cc: Andi Shyti 
---
 lib/Makefile.sources|  4 +-
 lib/igt_gpu_power.c | 87 -
 lib/igt_rapl.c  | 69 +++
 lib/{igt_gpu_power.h => igt_rapl.h} | 69 ---
 lib/meson.build |  2 +-
 tests/i915/gem_exec_schedule.c  | 39 -
 tests/i915/gem_exec_whisper.c   | 15 ++---
 7 files changed, 155 insertions(+), 130 deletions(-)
 delete mode 100644 lib/igt_gpu_power.c
 create mode 100644 lib/igt_rapl.c
 rename lib/{igt_gpu_power.h => igt_rapl.h} (52%)

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index cf094ab89..34e0c012d 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -33,8 +33,6 @@ lib_source_list = \
igt_edid.h  \
igt_eld.c   \
igt_eld.h   \
-   igt_gpu_power.c \
-   igt_gpu_power.h \
igt_gt.c\
igt_gt.h\
igt_gvt.c   \
@@ -49,6 +47,8 @@ lib_source_list = \
igt_primes.h\
igt_rand.c  \
igt_rand.h  \
+   igt_rapl.c  \
+   igt_rapl.h  \
igt_rc.h\
igt_stats.c \
igt_stats.h \
diff --git a/lib/igt_gpu_power.c b/lib/igt_gpu_power.c
deleted file mode 100644
index 7092b75b3..0
--- a/lib/igt_gpu_power.c
+++ /dev/null
@@ -1,87 +0,0 @@
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include "igt_gpu_power.h"
-#include "igt_perf.h"
-#include "igt_sysfs.h"
-
-struct rapl {
-   uint64_t power, type;
-   double scale;
-};
-
-static int rapl_parse(struct rapl *r)
-{
-   locale_t locale, oldlocale;
-   bool result;
-   int dir;
-
-   memset(r, 0, sizeof(*r));
-
-   dir = open("/sys/devices/power", O_RDONLY);
-   if (dir < 0)
-   return -errno;
-
-   /* Replace user environment with plain C to match kernel format */
-   locale = newlocale(LC_ALL, "C", 0);
-   oldlocale = uselocale(locale);
-
-   result = true;
-   result &= igt_sysfs_scanf(dir, "type",
- "%"PRIu64, >type) == 1;
-   result &= igt_sysfs_scanf(dir, "events/energy-gpu",
- "event=%"PRIx64, >power) == 1;
-   result &= igt_sysfs_scanf(dir, "events/energy-gpu.scale",
- "%lf", >scale) == 1;
-
-   uselocale(oldlocale);
-   freelocale(locale);
-
-   close(dir);
-
-   if (!result)
-   return -EINVAL;
-
-   if (isnan(r->scale) || !r->scale)
-   return -ERANGE;
-
-   return 0;
-}
-
-int gpu_power_open(struct gpu_power *power)
-{
-   struct rapl r;
-
-   power->fd = rapl_parse();
-   if (power->fd < 0)
-   goto err;
-
-   power->fd = igt_perf_open(r.type, r.power);
-   if (power->fd < 0) {
-   power->fd = -errno;
-   goto err;
-   }
-
-   power->scale = r.scale;
-
-   return 0;
-
-err:
-   errno = 0;
-   return power->fd;
-}
-
-bool gpu_power_read(struct gpu_power *power, struct gpu_power_sample *s)
-{
-   return read(power->fd, s, sizeof(*s)) == sizeof(*s);
-}
-
-void gpu_power_close(struct gpu_power *power)
-{
-   close(power->fd);
-}
diff --git a/lib/igt_rapl.c b/lib/igt_rapl.c
new file mode 100644
index 0..03e492260
--- /dev/null
+++ b/lib/igt_rapl.c
@@ -0,0 +1,69 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "igt_perf.h"
+#include "igt_rapl.h"
+#include "igt_sysfs.h"
+
+static int rapl_parse(struct rapl *r, const char *str)
+{
+   locale_t locale, oldlocale;
+   bool result = true;
+   char buf[128];
+   int dir;
+
+   memset(r, 0, sizeof(*r));
+
+   dir = open("/sys/devices/power", O_RDONLY);
+   if (dir < 0)
+   return -errno;
+
+   /* Replace user environment with plain C to match kernel format */
+   locale = newlocale(LC_ALL, "C", 0);
+   oldlocale = uselocale(locale);
+
+   result &= igt_sysfs_scanf(dir, "type", "%"PRIu64, >type) == 1;
+
+   snprintf(buf, sizeof(buf), "events/energy-%s", str);
+   result &= igt_sysfs_scanf(dir, buf, "event=%"PRIx64, >power) == 1;
+
+   snprintf(buf, sizeof(buf), "events/energy-%s.scale", str);
+   result &= igt_sysfs_scan

[Intel-gfx] [PATCH i-g-t 2/2] overlay: Show total package power

2019-10-13 Thread Chris Wilson
Add the total package power after the GPU package power, for reference.

Signed-off-by: Chris Wilson 
---
 overlay/overlay.c | 18 ++
 overlay/power.c   | 88 +++
 overlay/power.h   | 21 +--
 3 files changed, 59 insertions(+), 68 deletions(-)

diff --git a/overlay/overlay.c b/overlay/overlay.c
index eae5ddfa8..dd4fca29e 100644
--- a/overlay/overlay.c
+++ b/overlay/overlay.c
@@ -609,12 +609,12 @@ static void show_gpu_freq(struct overlay_context *ctx, 
struct overlay_gpu_freq *
}
 
if (has_power) {
-   chart_add_sample(>power_chart, gf->power.power_mW);
-   if (gf->power.new_sample) {
-   if (gf->power.power_mW > gf->power_max)
-   gf->power_max = gf->power.power_mW;
+   chart_add_sample(>power_chart, gf->power.gpu.power_mW);
+   if (gf->power.gpu.new_sample) {
+   if (gf->power.gpu.power_mW > gf->power_max)
+   gf->power_max = gf->power.gpu.power_mW;
chart_set_range(>power_chart, 0, gf->power_max);
-   gf->power.new_sample = 0;
+   gf->power.gpu.new_sample = 0;
}
chart_draw(>power_chart, ctx->cr);
}
@@ -700,8 +700,14 @@ static void show_gpu_freq(struct overlay_context *ctx, 
struct overlay_gpu_freq *
}
 
if (has_power) {
-   sprintf(buf, "Power: %llumW", (long long 
unsigned)gf->power.power_mW);
cairo_set_source_rgba(ctx->cr, 1, 1, 1, 1);
+
+   sprintf(buf, "Power: %llumW", (long long 
unsigned)gf->power.gpu.power_mW);
+   cairo_move_to(ctx->cr, PAD, y);
+   cairo_show_text(ctx->cr, buf);
+   y += 14;
+
+   sprintf(buf, "Package: %llumW", (long long 
unsigned)gf->power.pkg.power_mW);
cairo_move_to(ctx->cr, PAD, y);
cairo_show_text(ctx->cr, buf);
y += 14;
diff --git a/overlay/power.c b/overlay/power.c
index 0f99e2a4a..76fafea91 100644
--- a/overlay/power.c
+++ b/overlay/power.c
@@ -77,15 +77,6 @@ static uint64_t filename_to_u64(const char *filename, int 
base)
return strtoull(b, NULL, base);
 }
 
-static uint64_t debugfs_file_to_u64(const char *name)
-{
-   char buf[1024];
-
-   snprintf(buf, sizeof(buf), "%s/%s", debugfs_dri_path, name);
-
-   return filename_to_u64(buf, 0);
-}
-
 static uint64_t rapl_type_id(void)
 {
return filename_to_u64("/sys/devices/power/type", 10);
@@ -96,6 +87,11 @@ static uint64_t rapl_gpu_power(void)
return filename_to_u64("/sys/devices/power/events/energy-gpu", 0);
 }
 
+static uint64_t rapl_pkg_power(void)
+{
+   return filename_to_u64("/sys/devices/power/events/energy-pkg", 0);
+}
+
 static double filename_to_double(const char *filename)
 {
char *oldlocale;
@@ -117,70 +113,58 @@ static double rapl_gpu_power_scale(void)
return filename_to_double("/sys/devices/power/events/energy-gpu.scale");
 }
 
-int power_init(struct power *power)
+static double rapl_pkg_power_scale(void)
 {
-   uint64_t val;
+   return filename_to_double("/sys/devices/power/events/energy-pkg.scale");
+}
 
+int power_init(struct power *power)
+{
memset(power, 0, sizeof(*power));
 
-   power->fd = igt_perf_open(rapl_type_id(), rapl_gpu_power());
-   if (power->fd >= 0) {
-   power->rapl_scale = rapl_gpu_power_scale();
-
-   if (power->rapl_scale != NAN) {
-   power->rapl_scale *= 1e3; /* from nano to micro */
-   return 0;
-   }
-   }
+   power->gpu.fd = igt_perf_open(rapl_type_id(), rapl_gpu_power());
+   if (power->gpu.fd < 0)
+   return power->error = ENOENT;
+   power->gpu.scale = rapl_gpu_power_scale() * 1e3; /* to milli */
 
-   val = debugfs_file_to_u64("i915_energy_uJ");
-   if (val == 0)
-   return power->error = EINVAL;
+   power->pkg.fd = igt_perf_open(rapl_type_id(), rapl_pkg_power());
+   power->pkg.scale = rapl_pkg_power_scale() *1e3; /* to milli */
 
return 0;
 }
 
-static uint64_t clock_ms_to_u64(void)
+static void __power_update(struct power_domain *pd, int count)
 {
-   struct timespec tv;
+   struct power_stat *s = >stat[count & 1];
+   struct power_stat *d = >stat[(count + 1) & 1];
+   uint64_t data[2], d_time;
+   int len;
 
-   if (clock_gettime(CLOCK_MONOTONIC, ) < 0)
-   return 0;
+   len = read(pd->fd, data, sizeof(data));
+   if (len != sizeof(data))
+   return;
 
-   return (uint64_t)tv.tv_sec *

[Intel-gfx] [PATCH v2] drm/i915/selftests: Fixup naked 64b divide

2019-10-13 Thread Chris Wilson
drivers/gpu/drm/i915/intel_memory_region.o: in function `igt_mock_contiguous':
drivers/gpu/drm/i915/selftests/intel_memory_region.c:166: undefined reference 
to `__umoddi3'

v2: promote target to u64 for consistency across all builds

Reported-by: kbuild test robot 
Fixes: 2f0b97ca0211 ("drm/i915/region: support contiguous allocations")
Signed-off-by: Chris Wilson 
Cc: Matthew Auld 
---
 drivers/gpu/drm/i915/selftests/intel_memory_region.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c 
b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
index 4e44c81e8e5b..56091e7e599e 100644
--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -128,9 +128,9 @@ static int igt_mock_contiguous(void *arg)
LIST_HEAD(objects);
LIST_HEAD(holes);
I915_RND_STATE(prng);
-   resource_size_t target;
resource_size_t total;
resource_size_t min;
+   u64 target;
int err = 0;
 
total = resource_size(>region);
@@ -163,7 +163,9 @@ static int igt_mock_contiguous(void *arg)
igt_object_release(obj);
 
/* Internal fragmentation should not bleed into the object size */
-   target = round_up(prandom_u32_state() % total, PAGE_SIZE);
+   target = i915_prandom_u64_state();
+   div64_u64_rem(target, total, );
+   target = round_up(target, PAGE_SIZE);
target = max_t(u64, PAGE_SIZE, target);
 
obj = igt_object_create(mem, , target,
@@ -172,8 +174,8 @@ static int igt_mock_contiguous(void *arg)
return PTR_ERR(obj);
 
if (obj->base.size != target) {
-   pr_err("%s obj->base.size(%llx) != target(%llx)\n", __func__,
-  (u64)obj->base.size, (u64)target);
+   pr_err("%s obj->base.size(%zx) != target(%llx)\n", __func__,
+  obj->base.size, target);
err = -EINVAL;
goto err_close_objects;
}
@@ -236,7 +238,7 @@ static int igt_mock_contiguous(void *arg)
I915_BO_ALLOC_CONTIGUOUS);
if (should_fail != IS_ERR(obj)) {
pr_err("%s target allocation(%llx) mismatch\n",
-  __func__, (u64)target);
+  __func__, target);
err = -EINVAL;
goto err_close_objects;
}
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/perf: Avoid polluting the i915_oa_config with error pointers

2019-10-13 Thread Chris Wilson
Use a local variable to track the allocation errors to avoid polluting
the struct and keep the free simple.

Reported-by: kbuild test robot 
Reported-by: Dan Carpenter 
Signed-off-by: Chris Wilson 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/i915_perf.c | 52 +++-
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index b4e2332d35cb..366580701ba2 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -384,12 +384,9 @@ void i915_oa_config_release(struct kref *ref)
struct i915_oa_config *oa_config =
container_of(ref, typeof(*oa_config), ref);
 
-   if (!PTR_ERR(oa_config->flex_regs))
-   kfree(oa_config->flex_regs);
-   if (!PTR_ERR(oa_config->b_counter_regs))
-   kfree(oa_config->b_counter_regs);
-   if (!PTR_ERR(oa_config->mux_regs))
-   kfree(oa_config->mux_regs);
+   kfree(oa_config->flex_regs);
+   kfree(oa_config->b_counter_regs);
+   kfree(oa_config->mux_regs);
 
kfree_rcu(oa_config, rcu);
 }
@@ -3631,6 +3628,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, 
void *data,
struct i915_perf *perf = _i915(dev)->perf;
struct drm_i915_perf_oa_config *args = data;
struct i915_oa_config *oa_config, *tmp;
+   static struct i915_oa_reg *regs;
int err, id;
 
if (!perf->i915) {
@@ -3676,30 +3674,30 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, 
void *data,
memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid));
 
oa_config->mux_regs_len = args->n_mux_regs;
-   oa_config->mux_regs =
-   alloc_oa_regs(perf,
- perf->ops.is_valid_mux_reg,
- u64_to_user_ptr(args->mux_regs_ptr),
- args->n_mux_regs);
+   regs = alloc_oa_regs(perf,
+perf->ops.is_valid_mux_reg,
+u64_to_user_ptr(args->mux_regs_ptr),
+args->n_mux_regs);
 
-   if (IS_ERR(oa_config->mux_regs)) {
+   if (IS_ERR(regs)) {
DRM_DEBUG("Failed to create OA config for mux_regs\n");
-   err = PTR_ERR(oa_config->mux_regs);
+   err = PTR_ERR(regs);
goto reg_err;
}
+   oa_config->mux_regs = regs;
 
oa_config->b_counter_regs_len = args->n_boolean_regs;
-   oa_config->b_counter_regs =
-   alloc_oa_regs(perf,
- perf->ops.is_valid_b_counter_reg,
- u64_to_user_ptr(args->boolean_regs_ptr),
- args->n_boolean_regs);
+   regs = alloc_oa_regs(perf,
+perf->ops.is_valid_b_counter_reg,
+u64_to_user_ptr(args->boolean_regs_ptr),
+args->n_boolean_regs);
 
-   if (IS_ERR(oa_config->b_counter_regs)) {
+   if (IS_ERR(regs)) {
DRM_DEBUG("Failed to create OA config for b_counter_regs\n");
-   err = PTR_ERR(oa_config->b_counter_regs);
+   err = PTR_ERR(regs);
goto reg_err;
}
+   oa_config->b_counter_regs = regs;
 
if (INTEL_GEN(perf->i915) < 8) {
if (args->n_flex_regs != 0) {
@@ -3708,17 +3706,17 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, 
void *data,
}
} else {
oa_config->flex_regs_len = args->n_flex_regs;
-   oa_config->flex_regs =
-   alloc_oa_regs(perf,
- perf->ops.is_valid_flex_reg,
- u64_to_user_ptr(args->flex_regs_ptr),
- args->n_flex_regs);
+   regs = alloc_oa_regs(perf,
+perf->ops.is_valid_flex_reg,
+u64_to_user_ptr(args->flex_regs_ptr),
+args->n_flex_regs);
 
-   if (IS_ERR(oa_config->flex_regs)) {
+   if (IS_ERR(regs)) {
DRM_DEBUG("Failed to create OA config for flex_regs\n");
-   err = PTR_ERR(oa_config->flex_regs);
+   err = PTR_ERR(regs);
goto reg_err;
}
+   oa_config->flex_regs = regs;
}
 
err = mutex_lock_interruptible(>metrics_lock);
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/gem: Distinguish each object type

2019-10-13 Thread Chris Wilson
Separate each object class into a separate lock type to avoid lockdep
cross-contamination between paths (i.e. userptr!).

Signed-off-by: Chris Wilson 
Cc: Matthew Auld 
---
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c   | 3 ++-
 drivers/gpu/drm/i915/gem/i915_gem_internal.c | 3 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.c   | 5 +++--
 drivers/gpu/drm/i915/gem/i915_gem_object.h   | 3 ++-
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c| 3 ++-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c   | 3 ++-
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c  | 3 ++-
 drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c | 3 ++-
 drivers/gpu/drm/i915/gem/selftests/huge_pages.c  | 8 +---
 drivers/gpu/drm/i915/gvt/dmabuf.c| 3 ++-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c| 3 ++-
 drivers/gpu/drm/i915/selftests/mock_region.c | 3 ++-
 12 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 96ce95c8ac5a..eaea49d08eb5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -256,6 +256,7 @@ static const struct drm_i915_gem_object_ops 
i915_gem_object_dmabuf_ops = {
 struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 struct dma_buf *dma_buf)
 {
+   static struct lock_class_key lock_class;
struct dma_buf_attachment *attach;
struct drm_i915_gem_object *obj;
int ret;
@@ -287,7 +288,7 @@ struct drm_gem_object *i915_gem_prime_import(struct 
drm_device *dev,
}
 
drm_gem_private_object_init(dev, >base, dma_buf->size);
-   i915_gem_object_init(obj, _gem_object_dmabuf_ops);
+   i915_gem_object_init(obj, _gem_object_dmabuf_ops, _class);
obj->base.import_attach = attach;
obj->base.resv = dma_buf->resv;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c 
b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index 5ae694c24df4..9cfb0e41ff06 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -164,6 +164,7 @@ struct drm_i915_gem_object *
 i915_gem_object_create_internal(struct drm_i915_private *i915,
phys_addr_t size)
 {
+   static struct lock_class_key lock_class;
struct drm_i915_gem_object *obj;
unsigned int cache_level;
 
@@ -178,7 +179,7 @@ i915_gem_object_create_internal(struct drm_i915_private 
*i915,
return ERR_PTR(-ENOMEM);
 
drm_gem_private_object_init(>drm, >base, size);
-   i915_gem_object_init(obj, _gem_object_internal_ops);
+   i915_gem_object_init(obj, _gem_object_internal_ops, _class);
 
/*
 * Mark the object as volatile, such that the pages are marked as
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index dbf9be9a79f4..a50296cce0d8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -47,9 +47,10 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj)
 }
 
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
- const struct drm_i915_gem_object_ops *ops)
+ const struct drm_i915_gem_object_ops *ops,
+ struct lock_class_key *key)
 {
-   mutex_init(>mm.lock);
+   __mutex_init(>mm.lock, "obj->mm.lock", key);
 
spin_lock_init(>vma.lock);
INIT_LIST_HEAD(>vma.list);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index c5e14c9c805c..b0245585b4d5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -23,7 +23,8 @@ struct drm_i915_gem_object *i915_gem_object_alloc(void);
 void i915_gem_object_free(struct drm_i915_gem_object *obj);
 
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
- const struct drm_i915_gem_object_ops *ops);
+ const struct drm_i915_gem_object_ops *ops,
+ struct lock_class_key *key);
 struct drm_i915_gem_object *
 i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size);
 struct drm_i915_gem_object *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 4c4954e8ce0a..f36f7d658380 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -458,6 +458,7 @@ static int create_shmem(struct drm_i915_private *i915,
 struct drm_i915_gem_object *
 i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
 {
+   static struct lock_class_key lock_class;
struct drm_i915_gem_object *obj;
struct address_sp

[Intel-gfx] [PATCH] drm/i915/selftests: Fixup naked 64b divide

2019-10-13 Thread Chris Wilson
drivers/gpu/drm/i915/intel_memory_region.o: in function `igt_mock_contiguous':
drivers/gpu/drm/i915/selftests/intel_memory_region.c:166: undefined reference 
to `__umoddi3'

Reported-by: kbuild test robot 
Fixes: 2f0b97ca0211 ("drm/i915/region: support contiguous allocations")
Signed-off-by: Chris Wilson 
Cc: Matthew Auld 
---
 drivers/gpu/drm/i915/selftests/intel_memory_region.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c 
b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
index 4e44c81e8e5b..013dfb7683a3 100644
--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -163,7 +163,9 @@ static int igt_mock_contiguous(void *arg)
igt_object_release(obj);
 
/* Internal fragmentation should not bleed into the object size */
-   target = round_up(prandom_u32_state() % total, PAGE_SIZE);
+   target = i915_prandom_u64_state();
+   div64_u64_rem(target, total, );
+   target = round_up(target, PAGE_SIZE);
target = max_t(u64, PAGE_SIZE, target);
 
obj = igt_object_create(mem, , target,
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/perf: Prefer using the pinned_ctx for emitting delays on config

2019-10-12 Thread Chris Wilson
When we are watching a particular context, we want the OA config to be
applied inline with that context such that it takes effect before the
next submission.

Signed-off-by: Chris Wilson 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/i915_perf.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 74f50120c151..b4e2332d35cb 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1903,6 +1903,11 @@ static int emit_oa_config(struct i915_perf_stream 
*stream,
return err;
 }
 
+static struct intel_context *oa_context(struct i915_perf_stream *stream)
+{
+   return stream->pinned_ctx ?: stream->engine->kernel_context;
+}
+
 static int hsw_enable_metric_set(struct i915_perf_stream *stream)
 {
struct intel_uncore *uncore = stream->uncore;
@@ -1922,7 +1927,7 @@ static int hsw_enable_metric_set(struct i915_perf_stream 
*stream)
intel_uncore_rmw(uncore, GEN6_UCGCTL1,
 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
 
-   return emit_oa_config(stream, stream->engine->kernel_context);
+   return emit_oa_config(stream, oa_context(stream));
 }
 
 static void hsw_disable_metric_set(struct i915_perf_stream *stream)
@@ -2286,7 +2291,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream 
*stream)
if (ret)
return ret;
 
-   return emit_oa_config(stream, stream->engine->kernel_context);
+   return emit_oa_config(stream, oa_context(stream));
 }
 
 static void gen8_disable_metric_set(struct i915_perf_stream *stream)
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/perf: Prefer using the pinned_ctx for emitting delays on config

2019-10-12 Thread Chris Wilson
When we are watching a particular context, we want the OA config to be
applied inline with that context such that it takes effect before the
next submission.

Signed-off-by: Chris Wilson 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/i915_perf.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 74f50120c151..959b6c341934 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1903,6 +1903,11 @@ static int emit_oa_config(struct i915_perf_stream 
*stream,
return err;
 }
 
+static struct intel_context *oa_context(struct i915_perf_stream *stream)
+{
+   return stream->pinned_ctx ?: stream->engine->kernel_context;
+}
+
 static int hsw_enable_metric_set(struct i915_perf_stream *stream)
 {
struct intel_uncore *uncore = stream->uncore;
@@ -1922,7 +1927,7 @@ static int hsw_enable_metric_set(struct i915_perf_stream 
*stream)
intel_uncore_rmw(uncore, GEN6_UCGCTL1,
 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
 
-   return emit_oa_config(stream, stream->engine->kernel_context);
+   return emit_oa_config(stream, oa_context(stream));
 }
 
 static void hsw_disable_metric_set(struct i915_perf_stream *stream)
@@ -2286,7 +2291,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream 
*stream)
if (ret)
return ret;
 
-   return emit_oa_config(stream, stream->engine->kernel_context);
+   return emit_oa_config(stream, oa_context(streama));
 }
 
 static void gen8_disable_metric_set(struct i915_perf_stream *stream)
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/display: Squelch kerneldoc warnings

2019-10-12 Thread Chris Wilson
Just a parameter rename,

drivers/gpu/drm/i915/display/intel_display.c:14425: warning: Function parameter 
or member '_new_plane_state' not described in 'intel_prepare_plane_fb'
drivers/gpu/drm/i915/display/intel_display.c:14425: warning: Excess function 
parameter 'new_state' description in 'intel_prepare_plane_fb'
drivers/gpu/drm/i915/display/intel_display.c:14534: warning: Function parameter 
or member '_old_plane_state' not described in 'intel_cleanup_plane_fb'
drivers/gpu/drm/i915/display/intel_display.c:14534: warning: Excess function 
parameter 'old_state' description in 'intel_cleanup_plane_fb'

Signed-off-by: Chris Wilson 
Cc: Maarten Lankhorst 
Cc: Ville Syrjälä 
Cc: Matt Roper 
---
 drivers/gpu/drm/i915/display/intel_display.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index a146ec02a0c1..3cf39fc153b3 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -14410,7 +14410,7 @@ static void fb_obj_bump_render_priority(struct 
drm_i915_gem_object *obj)
 /**
  * intel_prepare_plane_fb - Prepare fb for usage on plane
  * @plane: drm plane to prepare for
- * @new_state: the plane state being prepared
+ * @_new_plane_state: the plane state being prepared
  *
  * Prepares a framebuffer for usage on a display plane.  Generally this
  * involves pinning the underlying object and updating the frontbuffer tracking
@@ -14524,7 +14524,7 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 /**
  * intel_cleanup_plane_fb - Cleans up an fb after plane use
  * @plane: drm plane to clean up for
- * @old_state: the state from the previous modeset
+ * @_old_plane_state: the state from the previous modeset
  *
  * Cleans up a framebuffer that has just been removed from a plane.
  */
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [CI 3/3] drm/i915/perf: execute OA configuration from command stream

2019-10-12 Thread Chris Wilson
From: Lionel Landwerlin 

We haven't run into issues with programming the global OA/NOA
registers configuration from CPU so far, but HW engineers actually
recommend doing this from the command streamer. On TGL in particular
one of the clock domain in which some of that programming goes might
not be powered when we poke things from the CPU.

Since we have a command buffer prepared for the execbuffer side of
things, we can reuse that approach here too.

This also allows us to significantly reduce the amount of time we hold
the main lock.

v2: Drop the global lock as much as possible

v3: Take global lock to pin global

v4: Create i915 request in emit_oa_config() to avoid deadlocks (Lionel)

v5: Move locking to the stream (Lionel)

v6: Move active reconfiguration request into i915_perf_stream (Lionel)

v7: Pin VMA outside request creation (Chris)
Lock VMA before move to active (Chris)

v8: Fix double free on stream->initial_oa_config_bo (Lionel)
Don't allow interruption when waiting on active config request
(Lionel)

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson 
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_perf.c | 199 ---
 1 file changed, 156 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 81e8a7934001..74f50120c151 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1731,56 +1731,181 @@ static int alloc_noa_wait(struct i915_perf_stream 
*stream)
return 0;
 
 err_unpin:
-   __i915_vma_unpin(vma);
+   i915_vma_unpin_and_release(, 0);
 err_unref:
i915_gem_object_put(bo);
return ret;
 }
 
-static void config_oa_regs(struct intel_uncore *uncore,
-  const struct i915_oa_reg *regs,
-  u32 n_regs)
+static u32 *write_cs_mi_lri(u32 *cs,
+   const struct i915_oa_reg *reg_data,
+   u32 n_regs)
 {
u32 i;
 
for (i = 0; i < n_regs; i++) {
-   const struct i915_oa_reg *reg = regs + i;
+   if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
+   u32 n_lri = min_t(u32,
+ n_regs - i,
+ MI_LOAD_REGISTER_IMM_MAX_REGS);
+
+   *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
+   }
+   *cs++ = i915_mmio_reg_offset(reg_data[i].addr);
+   *cs++ = reg_data[i].value;
+   }
+
+   return cs;
+}
+
+static int num_lri_dwords(int num_regs)
+{
+   int count = 0;
+
+   if (num_regs > 0) {
+   count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS);
+   count += num_regs * 2;
+   }
+
+   return count;
+}
+
+static struct i915_oa_config_bo *
+alloc_oa_config_buffer(struct i915_perf_stream *stream,
+  struct i915_oa_config *oa_config)
+{
+   struct drm_i915_gem_object *obj;
+   struct i915_oa_config_bo *oa_bo;
+   size_t config_length = 0;
+   u32 *cs;
+   int err;
+
+   oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
+   if (!oa_bo)
+   return ERR_PTR(-ENOMEM);
+
+   config_length += num_lri_dwords(oa_config->mux_regs_len);
+   config_length += num_lri_dwords(oa_config->b_counter_regs_len);
+   config_length += num_lri_dwords(oa_config->flex_regs_len);
+   config_length++; /* MI_BATCH_BUFFER_END */
+   config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
+
+   obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
+   if (IS_ERR(obj)) {
+   err = PTR_ERR(obj);
+   goto err_free;
+   }
+
+   cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
+   if (IS_ERR(cs)) {
+   err = PTR_ERR(cs);
+   goto err_oa_bo;
+   }
 
-   intel_uncore_write(uncore, reg->addr, reg->value);
+   cs = write_cs_mi_lri(cs,
+oa_config->mux_regs,
+oa_config->mux_regs_len);
+   cs = write_cs_mi_lri(cs,
+oa_config->b_counter_regs,
+oa_config->b_counter_regs_len);
+   cs = write_cs_mi_lri(cs,
+oa_config->flex_regs,
+oa_config->flex_regs_len);
+
+   *cs++ = MI_BATCH_BUFFER_END;
+
+   i915_gem_object_flush_map(obj);
+   i915_gem_object_unpin_map(obj);
+
+   oa_bo->vma = i915_vma_instance(obj,
+  >engine->gt->ggtt->vm,
+  NULL);
+   if (IS_ERR(oa_bo->vma)) {
+   err = PTR_ERR(oa_bo->vma);
+   goto err_oa_bo;
}
+
+   oa_bo->oa_config = i915_oa_config_get(oa_config);
+   ll

[Intel-gfx] [CI 1/3] drm/i915/perf: allow for CS OA configs to be created lazily

2019-10-12 Thread Chris Wilson
From: Lionel Landwerlin 

Here we introduce a mechanism by which the execbuf part of the i915
driver will be able to request that a batch buffer containing the
programming for a particular OA config be created.

We'll execute these OA configuration buffers right before executing a
set of userspace commands so that a particular user batchbuffer be
executed with a given OA configuration.

This mechanism essentially allows the userspace driver to go through
several OA configuration without having to open/close the i915/perf
stream.

v2: No need for locking on object OA config object creation (Chris)
Flush cpu mapping of OA config (Chris)

v3: Properly deal with the perf_metric lock (Chris/Lionel)

v4: Fix oa config unref/put when not found (Lionel)

v5: Allocate BOs for configurations on the stream instead of globally
(Lionel)

v6: Fix 64bit division (Chris)

v7: Store allocated config BOs into the stream (Lionel)

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson  (v4)
Signed-off-by: Chris Wilson  (v4)
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |   1 +
 drivers/gpu/drm/i915/i915_perf.c | 107 +++
 drivers/gpu/drm/i915/i915_perf.h |  24 +
 drivers/gpu/drm/i915/i915_perf_types.h   |  23 ++--
 4 files changed, 102 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index b0227ab2fe1b..0987100c786b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -138,6 +138,7 @@
 /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
 #define   MI_LRI_CS_MMIO   (1<<19)
 #define   MI_LRI_FORCE_POSTED  (1<<12)
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
 #define MI_STORE_REGISTER_MEMMI_INSTR(0x24, 1)
 #define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
 #define   MI_SRM_LRM_GLOBAL_GTT(1<<22)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c4a436dfb7db..50f2f972020d 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -369,52 +369,52 @@ struct perf_open_properties {
struct intel_engine_cs *engine;
 };
 
+struct i915_oa_config_bo {
+   struct llist_node node;
+
+   struct i915_oa_config *oa_config;
+   struct i915_vma *vma;
+};
+
 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
 
-static void free_oa_config(struct i915_oa_config *oa_config)
+void i915_oa_config_release(struct kref *ref)
 {
+   struct i915_oa_config *oa_config =
+   container_of(ref, typeof(*oa_config), ref);
+
if (!PTR_ERR(oa_config->flex_regs))
kfree(oa_config->flex_regs);
if (!PTR_ERR(oa_config->b_counter_regs))
kfree(oa_config->b_counter_regs);
if (!PTR_ERR(oa_config->mux_regs))
kfree(oa_config->mux_regs);
-   kfree(oa_config);
-}
-
-static void put_oa_config(struct i915_oa_config *oa_config)
-{
-   if (!atomic_dec_and_test(_config->ref_count))
-   return;
 
-   free_oa_config(oa_config);
+   kfree_rcu(oa_config, rcu);
 }
 
-static int get_oa_config(struct i915_perf *perf,
-int metrics_set,
-struct i915_oa_config **out_config)
+struct i915_oa_config *
+i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
 {
-   int ret;
-
-   if (metrics_set == 1) {
-   *out_config = >test_config;
-   atomic_inc(>test_config.ref_count);
-   return 0;
-   }
-
-   ret = mutex_lock_interruptible(>metrics_lock);
-   if (ret)
-   return ret;
+   struct i915_oa_config *oa_config;
 
-   *out_config = idr_find(>metrics_idr, metrics_set);
-   if (!*out_config)
-   ret = -EINVAL;
+   rcu_read_lock();
+   if (metrics_set == 1)
+   oa_config = >test_config;
else
-   atomic_inc(&(*out_config)->ref_count);
+   oa_config = idr_find(>metrics_idr, metrics_set);
+   if (oa_config)
+   oa_config = i915_oa_config_get(oa_config);
+   rcu_read_unlock();
 
-   mutex_unlock(>metrics_lock);
+   return oa_config;
+}
 
-   return ret;
+static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
+{
+   i915_oa_config_put(oa_bo->oa_config);
+   i915_vma_put(oa_bo->vma);
+   kfree(oa_bo);
 }
 
 static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
@@ -1337,6 +1337,16 @@ free_oa_buffer(struct i915_perf_stream *stream)
stream->oa_buffer.vaddr = NULL;
 }
 
+static void
+free_oa_configs(struct i915_perf_stream *stream)
+{
+   struct i915_oa_config_bo *oa_bo, *tmp;
+
+   i915_oa_config_put(stream->oa_confi

[Intel-gfx] [CI 2/3] drm/i915/perf: implement active wait for noa configurations

2019-10-12 Thread Chris Wilson
From: Lionel Landwerlin 

NOA configuration take some amount of time to apply. That amount of
time depends on the size of the GT. There is no documented time for
this. For example, past experimentations with powergating
configuration changes seem to indicate a 60~70us delay. We go with
500us as default for now which should be over the required amount of
time (according to HW architects).

v2: Don't forget to save/restore registers used for the wait (Chris)

v3: Name used CS_GPR registers (Chris)
Fix compile issue due to rebase (Lionel)

v4: Fix save/restore helpers (Umesh)

v5: Move noa_wait from drm_i915_private to i915_perf_stream (Lionel)

v6: Add missing struct declarations in i915_perf.h

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson  (v4)
Signed-off-by: Chris Wilson  (v4)
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |   4 +-
 drivers/gpu/drm/i915/gt/intel_gt_types.h  |   5 +
 drivers/gpu/drm/i915/i915_debugfs.c   |  32 +++
 drivers/gpu/drm/i915/i915_perf.c  | 224 ++
 drivers/gpu/drm/i915/i915_perf_types.h|   8 +
 drivers/gpu/drm/i915/i915_reg.h   |   4 +-
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 drivers/gpu/drm/i915/selftests/i915_perf.c| 216 +
 8 files changed, 492 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/selftests/i915_perf.c

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 0987100c786b..8e63cffcabe0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -163,7 +163,8 @@
 #define MI_BATCH_BUFFER_START  MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
 #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1)
-#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
+#define   MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
+#define   MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/
 
 /*
  * 3D instructions used by the kernel
@@ -224,6 +225,7 @@
 #define   PIPE_CONTROL_CS_STALL(1<<20)
 #define   PIPE_CONTROL_TLB_INVALIDATE  (1<<18)
 #define   PIPE_CONTROL_MEDIA_STATE_CLEAR   (1<<16)
+#define   PIPE_CONTROL_WRITE_TIMESTAMP (3<<14)
 #define   PIPE_CONTROL_QW_WRITE(1<<14)
 #define   PIPE_CONTROL_POST_SYNC_OP_MASK(3<<14)
 #define   PIPE_CONTROL_DEPTH_STALL (1<<13)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 802f516a3430..be4b263621c8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -109,6 +109,11 @@ enum intel_gt_scratch_field {
/* 8 bytes */
INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
 
+   /* 6 * 8 bytes */
+   INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048,
+
+   /* 4 bytes */
+   INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096,
 };
 
 #endif /* __INTEL_GT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index e575761550ac..a541b6ae534f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3590,6 +3590,37 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
i915_wedged_get, i915_wedged_set,
"%llu\n");
 
+static int
+i915_perf_noa_delay_set(void *data, u64 val)
+{
+   struct drm_i915_private *i915 = data;
+   const u32 clk = RUNTIME_INFO(i915)->cs_timestamp_frequency_khz;
+
+   /*
+* This would lead to infinite waits as we're doing timestamp
+* difference on the CS with only 32bits.
+*/
+   if (val > mul_u32_u32(U32_MAX, clk))
+   return -EINVAL;
+
+   atomic64_set(>perf.noa_programming_delay, val);
+   return 0;
+}
+
+static int
+i915_perf_noa_delay_get(void *data, u64 *val)
+{
+   struct drm_i915_private *i915 = data;
+
+   *val = atomic64_read(>perf.noa_programming_delay);
+   return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops,
+   i915_perf_noa_delay_get,
+   i915_perf_noa_delay_set,
+   "%llu\n");
+
 #define DROP_UNBOUND   BIT(0)
 #define DROP_BOUND BIT(1)
 #define DROP_RETIREBIT(2)
@@ -4345,6 +4376,7 @@ static const struct i915_debugfs_files {
const char *name;
const struct file_operations *fops;
 } i915_debugfs_files[] = {
+   {"i915_perf_noa_delay", _perf_noa_delay_fops},
{"i915_wedged", _wedged_fops},
{"i915_cache_sharing", _cache_sharing_fops},
{"i915_gem_drop_caches", _drop_caches_fops},
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/driver

[Intel-gfx] [CI] drm/i915: Mark up "sentinel" requests

2019-10-12 Thread Chris Wilson
Sometimes we want to emit a terminator request, a request that flushes
the pipeline and allows no request to come after it. This can be used
for a "preempt-to-idle" to ensure that upon processing the
context-switch to that request, all other active contexts have been
flushed.

Signed-off-by: Chris Wilson 
Reviewed-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c |  6 +-
 drivers/gpu/drm/i915/i915_request.h | 10 --
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index b7d5be275fae..16b878d35814 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1208,7 +1208,8 @@ static bool can_merge_rq(const struct i915_request *prev,
if (i915_request_completed(next))
return true;
 
-   if (unlikely(prev->flags ^ next->flags) & I915_REQUEST_NOPREEMPT)
+   if (unlikely((prev->flags ^ next->flags) &
+(I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL)))
return false;
 
if (!can_merge_ctx(prev->hw_context, next->hw_context))
@@ -1659,6 +1660,9 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
if (last->hw_context == rq->hw_context)
goto done;
 
+   if (i915_request_has_sentinel(last))
+   goto done;
+
/*
 * If GVT overrides us we only ever submit
 * port[0], leaving port[1] empty. Note that we
diff --git a/drivers/gpu/drm/i915/i915_request.h 
b/drivers/gpu/drm/i915/i915_request.h
index 6a95242b280d..96991d64759c 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -216,8 +216,9 @@ struct i915_request {
unsigned long emitted_jiffies;
 
unsigned long flags;
-#define I915_REQUEST_WAITBOOST BIT(0)
-#define I915_REQUEST_NOPREEMPT BIT(1)
+#define I915_REQUEST_WAITBOOST BIT(0)
+#define I915_REQUEST_NOPREEMPT BIT(1)
+#define I915_REQUEST_SENTINEL  BIT(2)
 
/** timeline->request entry for this request */
struct list_head link;
@@ -440,6 +441,11 @@ static inline bool i915_request_has_nopreempt(const struct 
i915_request *rq)
return unlikely(rq->flags & I915_REQUEST_NOPREEMPT);
 }
 
+static inline bool i915_request_has_sentinel(const struct i915_request *rq)
+{
+   return unlikely(rq->flags & I915_REQUEST_SENTINEL);
+}
+
 static inline struct intel_timeline *
 i915_request_timeline(struct i915_request *rq)
 {
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/selftests: Serialise write to scratch with its vma binding

2019-10-11 Thread Chris Wilson
Add the missing serialisation on the request for a write into a vma to
wait until that vma is bound before being executed by the GPU.

Signed-off-by: Chris Wilson 
Cc: Matthew Auld 
---
 drivers/gpu/drm/i915/gt/selftest_workarounds.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c 
b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 1048be646c35..dc11f7ad50a2 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -786,6 +786,14 @@ static int read_whitelisted_registers(struct 
i915_gem_context *ctx,
if (IS_ERR(rq))
return PTR_ERR(rq);
 
+   i915_vma_lock(results);
+   err = i915_request_await_object(rq, results->obj, true);
+   if (err == 0)
+   err = i915_vma_move_to_active(results, rq, EXEC_OBJECT_WRITE);
+   i915_vma_unlock(results);
+   if (err)
+   goto err_req;
+
srm = MI_STORE_REGISTER_MEM;
if (INTEL_GEN(ctx->i915) >= 8)
srm++;
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm

2019-10-11 Thread Chris Wilson
As we now have a specific engine to use OA on, exchange the top-level
runtime-pm wakeref with the engine-pm. This still results in the same
top-level runtime-pm, but with more nuances to keep the engine and its
gt awake.

Signed-off-by: Chris Wilson 
Reviewed-by: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/i915_perf.c   | 8 
 drivers/gpu/drm/i915/i915_perf_types.h | 6 --
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 77c3cef64548..c4a436dfb7db 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -196,7 +196,7 @@
 #include 
 
 #include "gem/i915_gem_context.h"
-#include "gem/i915_gem_pm.h"
+#include "gt/intel_engine_pm.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_lrc_reg.h"
 
@@ -1353,7 +1353,7 @@ static void i915_oa_stream_destroy(struct 
i915_perf_stream *stream)
free_oa_buffer(stream);
 
intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
-   intel_runtime_pm_put(stream->uncore->rpm, stream->wakeref);
+   intel_engine_pm_put(stream->engine);
 
if (stream->ctx)
oa_put_render_ctx_id(stream);
@@ -2218,7 +2218,7 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
 *   In our case we are expecting that taking pm + FORCEWAKE
 *   references will effectively disable RC6.
 */
-   stream->wakeref = intel_runtime_pm_get(stream->uncore->rpm);
+   intel_engine_pm_get(stream->engine);
intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL);
 
ret = alloc_oa_buffer(stream);
@@ -2252,7 +2252,7 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
put_oa_config(stream->oa_config);
 
intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
-   intel_runtime_pm_put(stream->uncore->rpm, stream->wakeref);
+   intel_engine_pm_put(stream->engine);
 
 err_config:
if (stream->ctx)
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h 
b/drivers/gpu/drm/i915/i915_perf_types.h
index a91ae2d1a543..eb8d1ebd5095 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -134,12 +134,6 @@ struct i915_perf_stream {
 */
struct intel_uncore *uncore;
 
-   /**
-* @wakeref: As we keep the device awake while the perf stream is
-* active, we track our runtime pm reference for later release.
-*/
-   intel_wakeref_t wakeref;
-
/**
 * @engine: Engine associated with this performance stream.
 */
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [CI 2/9] drm/i915/perf: introduce a versioning of the i915-perf uapi

2019-10-11 Thread Chris Wilson
From: Lionel Landwerlin 

Reporting this version will help application figure out what level of
the support the running kernel provides.

v2: Add i915_perf_ioctl_version() (Chris)

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson 
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_getparam.c |  4 
 drivers/gpu/drm/i915/i915_perf.c | 10 ++
 drivers/gpu/drm/i915/i915_perf.h |  1 +
 include/uapi/drm/i915_drm.h  | 21 +
 4 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_getparam.c 
b/drivers/gpu/drm/i915/i915_getparam.c
index f4b3cbb1adce..ad33fbe90a28 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -5,6 +5,7 @@
 #include "gt/intel_engine_user.h"
 
 #include "i915_drv.h"
+#include "i915_perf.h"
 
 int i915_getparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
@@ -156,6 +157,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
case I915_PARAM_MMAP_GTT_COHERENT:
value = INTEL_INFO(i915)->has_coherent_ggtt;
break;
+   case I915_PARAM_PERF_REVISION:
+   value = i915_perf_ioctl_version();
+   break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c4a436dfb7db..0b51ab3ab523 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3665,3 +3665,13 @@ void i915_perf_fini(struct drm_i915_private *i915)
memset(>ops, 0, sizeof(perf->ops));
perf->i915 = NULL;
 }
+
+/**
+ * i915_perf_ioctl_version - Version of the i915-perf subsystem
+ *
+ * This version number is used by userspace to detect available features.
+ */
+int i915_perf_ioctl_version(void)
+{
+   return 1;
+}
diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h
index ff412fb0dbbf..295e33e8eef7 100644
--- a/drivers/gpu/drm/i915/i915_perf.h
+++ b/drivers/gpu/drm/i915/i915_perf.h
@@ -20,6 +20,7 @@ void i915_perf_init(struct drm_i915_private *i915);
 void i915_perf_fini(struct drm_i915_private *i915);
 void i915_perf_register(struct drm_i915_private *i915);
 void i915_perf_unregister(struct drm_i915_private *i915);
+int i915_perf_ioctl_version(void);
 
 int i915_perf_open_ioctl(struct drm_device *dev, void *data,
 struct drm_file *file);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 30c542144016..c50c712b3771 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -611,6 +611,13 @@ typedef struct drm_i915_irq_wait {
  * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
  */
 #define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
+
+/*
+ * Revision of the i915-perf uAPI. The value returned helps determine what
+ * i915-perf features are available. See drm_i915_perf_property_id.
+ */
+#define I915_PARAM_PERF_REVISION   54
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1844,23 +1851,31 @@ enum drm_i915_perf_property_id {
 * Open the stream for a specific context handle (as used with
 * execbuffer2). A stream opened for a specific context this way
 * won't typically require root privileges.
+*
+* This property is available in perf revision 1.
 */
DRM_I915_PERF_PROP_CTX_HANDLE = 1,
 
/**
 * A value of 1 requests the inclusion of raw OA unit reports as
 * part of stream samples.
+*
+* This property is available in perf revision 1.
 */
DRM_I915_PERF_PROP_SAMPLE_OA,
 
/**
 * The value specifies which set of OA unit metrics should be
 * be configured, defining the contents of any OA unit reports.
+*
+* This property is available in perf revision 1.
 */
DRM_I915_PERF_PROP_OA_METRICS_SET,
 
/**
 * The value specifies the size and layout of OA unit reports.
+*
+* This property is available in perf revision 1.
 */
DRM_I915_PERF_PROP_OA_FORMAT,
 
@@ -1870,6 +1885,8 @@ enum drm_i915_perf_property_id {
 * from this exponent as follows:
 *
 *   80ns * 2^(period_exponent + 1)
+*
+* This property is available in perf revision 1.
 */
DRM_I915_PERF_PROP_OA_EXPONENT,
 
@@ -1901,6 +1918,8 @@ struct drm_i915_perf_open_param {
  * to close and re-open a stream with the same configuration.
  *
  * It's undefined whether any pending data for the stream will be lost.
+ *
+ * This ioctl is available in perf revision 1.
  */
 #define I915_PERF_IOCTL_ENABLE _IO('i', 0x0)
 
@@ -1908,6 +1927,8 @@ struct drm_i915_perf_open_param {
  * Disable data captu

[Intel-gfx] [CI 4/9] drm/i915: add support for perf configuration queries

2019-10-11 Thread Chris Wilson
From: Lionel Landwerlin 

Listing configurations at the moment is supported only through sysfs.
This might cause issues for applications wanting to list
configurations from a container where sysfs isn't available.

This change adds a way to query the number of configurations and their
content through the i915 query uAPI.

v2: Fix sparse warnings (Lionel)
Add support to query configuration using uuid (Lionel)

v3: Fix some inconsistency in uapi header (Lionel)
Fix unlocking when not locked issue (Lionel)
Add debug messages (Lionel)

v4: Fix missing unlock (Dan)

v5: Drop lock when copying config content to userspace (Chris)

v6: Drop lock when copying config list to userspace (Chris)
Fix deadlock when calling i915_perf_get_oa_config() under
perf.metrics_lock (Lionel)
Add i915_oa_config_get() (Chris)

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson 
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_perf.c  |   3 +-
 drivers/gpu/drm/i915/i915_query.c | 295 ++
 include/uapi/drm/i915_drm.h   |  62 ++-
 3 files changed, 357 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 5fa0df46fcc3..7d7baee7febe 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3473,8 +3473,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, 
void *data,
 
GEM_BUG_ON(*arg != oa_config->id);
 
-   sysfs_remove_group(perf->metrics_kobj,
-  _config->sysfs_metric);
+   sysfs_remove_group(perf->metrics_kobj, _config->sysfs_metric);
 
idr_remove(>metrics_idr, *arg);
 
diff --git a/drivers/gpu/drm/i915/i915_query.c 
b/drivers/gpu/drm/i915/i915_query.c
index abac5042da2b..6a68ecc7bb5f 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -7,6 +7,7 @@
 #include 
 
 #include "i915_drv.h"
+#include "i915_perf.h"
 #include "i915_query.h"
 #include 
 
@@ -140,10 +141,304 @@ query_engine_info(struct drm_i915_private *i915,
return len;
 }
 
+static int can_copy_perf_config_registers_or_number(u32 user_n_regs,
+   u64 user_regs_ptr,
+   u32 kernel_n_regs)
+{
+   /*
+* We'll just put the number of registers, and won't copy the
+* register.
+*/
+   if (user_n_regs == 0)
+   return 0;
+
+   if (user_n_regs < kernel_n_regs)
+   return -EINVAL;
+
+   if (!access_ok(u64_to_user_ptr(user_regs_ptr),
+  2 * sizeof(u32) * kernel_n_regs))
+   return -EFAULT;
+
+   return 0;
+}
+
+static int copy_perf_config_registers_or_number(const struct i915_oa_reg 
*kernel_regs,
+   u32 kernel_n_regs,
+   u64 user_regs_ptr,
+   u32 *user_n_regs)
+{
+   u32 r;
+
+   if (*user_n_regs == 0) {
+   *user_n_regs = kernel_n_regs;
+   return 0;
+   }
+
+   *user_n_regs = kernel_n_regs;
+
+   for (r = 0; r < kernel_n_regs; r++) {
+   u32 __user *user_reg_ptr =
+   u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2);
+   u32 __user *user_val_ptr =
+   u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2 +
+   sizeof(u32));
+   int ret;
+
+   ret = __put_user(i915_mmio_reg_offset(kernel_regs[r].addr),
+user_reg_ptr);
+   if (ret)
+   return -EFAULT;
+
+   ret = __put_user(kernel_regs[r].value, user_val_ptr);
+   if (ret)
+   return -EFAULT;
+   }
+
+   return 0;
+}
+
+static int query_perf_config_data(struct drm_i915_private *i915,
+ struct drm_i915_query_item *query_item,
+ bool use_uuid)
+{
+   struct drm_i915_query_perf_config __user *user_query_config_ptr =
+   u64_to_user_ptr(query_item->data_ptr);
+   struct drm_i915_perf_oa_config __user *user_config_ptr =
+   u64_to_user_ptr(query_item->data_ptr +
+   sizeof(struct drm_i915_query_perf_config));
+   struct drm_i915_perf_oa_config user_config;
+   struct i915_perf *perf = >perf;
+   struct i915_oa_config *oa_config;
+   char uuid[UUID_STRING_LEN + 1];
+   u64 config_id;
+   u32 flags, total_size;
+   int ret;
+
+   if (!perf->i915)
+   return -ENODEV;
+
+   total_size =
+   sizeof(struct drm_i915_query_perf_config) +
+   sizeof(struct drm_i915_perf_oa_config);
+
+   

[Intel-gfx] [CI 9/9] drm/i915/execlists: Prevent merging requests with conflicting flags

2019-10-11 Thread Chris Wilson
We set out-of-bound parameters inside the i915_requests.flags field,
such as disabling preemption or marking the end-of-context. We should
not coalesce consecutive requests if they have differing instructions
as we only inspect the last active request in a context. Thus if we
allow a later request to be merged into the same execution context, it
will mask any of the earlier flags.

References: 2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption 
on a request")
Signed-off-by: Chris Wilson 
Cc: Lionel Landwerlin 
Reviewed-by: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 1f4772329021..b7d5be275fae 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1208,6 +1208,9 @@ static bool can_merge_rq(const struct i915_request *prev,
if (i915_request_completed(next))
return true;
 
+   if (unlikely(prev->flags ^ next->flags) & I915_REQUEST_NOPREEMPT)
+   return false;
+
if (!can_merge_ctx(prev->hw_context, next->hw_context))
return false;
 
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [CI 7/9] drm/i915/perf: Allow dynamic reconfiguration of the OA stream

2019-10-11 Thread Chris Wilson
Introduce a new perf_ioctl command to change the OA configuration of the
active stream. This allows the OA stream to be reconfigured between
batch buffers, giving greater flexibility in sampling. We inject a
request into the OA context to reconfigure the stream asynchronously on
the GPU in between and ordered with execbuffer calls.

Original patch for dynamic reconfiguration by Lionel Landwerlin.

Link: https://gitlab.freedesktop.org/mesa/mesa/merge_requests/932
Signed-off-by: Chris Wilson 
Reviewed-by: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/i915_perf.c | 49 +++-
 include/uapi/drm/i915_drm.h  | 13 +
 2 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c2431b5a1f55..5daaf8d0bdc3 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2860,6 +2860,43 @@ static void i915_perf_disable_locked(struct 
i915_perf_stream *stream)
stream->ops->disable(stream);
 }
 
+static long i915_perf_config_locked(struct i915_perf_stream *stream,
+   unsigned long metrics_set)
+{
+   struct i915_oa_config *config;
+   long ret = stream->oa_config->id;
+
+   config = i915_perf_get_oa_config(stream->perf, metrics_set);
+   if (!config)
+   return -EINVAL;
+
+   if (config != stream->oa_config) {
+   struct intel_context *ce;
+   int err;
+
+   /*
+* If OA is bound to a specific context, emit the
+* reconfiguration inline from that context. The update
+* will then be ordered with respect to submission on that
+* context.
+*
+* When set globally, we use a low priority kernel context,
+* so it will effectively take effect when idle.
+*/
+   ce = stream->pinned_ctx ?: stream->engine->kernel_context;
+
+   err = emit_oa_config(stream, ce);
+   if (err == 0)
+   config = xchg(>oa_config, config);
+   else
+   ret = err;
+   }
+
+   i915_oa_config_put(config);
+
+   return ret;
+}
+
 /**
  * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
  * @stream: An i915 perf stream
@@ -2883,6 +2920,8 @@ static long i915_perf_ioctl_locked(struct 
i915_perf_stream *stream,
case I915_PERF_IOCTL_DISABLE:
i915_perf_disable_locked(stream);
return 0;
+   case I915_PERF_IOCTL_CONFIG:
+   return i915_perf_config_locked(stream, arg);
}
 
return -EINVAL;
@@ -4020,7 +4059,15 @@ void i915_perf_fini(struct drm_i915_private *i915)
  */
 int i915_perf_ioctl_version(void)
 {
-   return 1;
+   /*
+* 1: Initial version
+*   I915_PERF_IOCTL_ENABLE
+*   I915_PERF_IOCTL_DISABLE
+*
+* 2: Added runtime modification of OA config.
+*   I915_PERF_IOCTL_CONFIG
+*/
+   return 2;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 0c7b2815fbf1..b008ce8b4e6f 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1932,6 +1932,19 @@ struct drm_i915_perf_open_param {
  */
 #define I915_PERF_IOCTL_DISABLE_IO('i', 0x1)
 
+/**
+ * Change metrics_set captured by a stream.
+ *
+ * If the stream is bound to a specific context, the configuration change
+ * will performed inline with that context such that it takes effect before
+ * the next execbuf submission.
+ *
+ * Returns the previously bound metrics set id, or a negative error code.
+ *
+ * This ioctl is available in perf revision 2.
+ */
+#define I915_PERF_IOCTL_CONFIG _IO('i', 0x2)
+
 /**
  * Common to all i915 perf records
  */
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [CI 6/9] drm/i915/perf: execute OA configuration from command stream

2019-10-11 Thread Chris Wilson
From: Lionel Landwerlin 

We haven't run into issues with programming the global OA/NOA
registers configuration from CPU so far, but HW engineers actually
recommend doing this from the command streamer. On TGL in particular
one of the clock domain in which some of that programming goes might
not be powered when we poke things from the CPU.

Since we have a command buffer prepared for the execbuffer side of
things, we can reuse that approach here too.

This also allows us to significantly reduce the amount of time we hold
the main lock.

v2: Drop the global lock as much as possible

v3: Take global lock to pin global

v4: Create i915 request in emit_oa_config() to avoid deadlocks (Lionel)

v5: Move locking to the stream (Lionel)

v6: Move active reconfiguration request into i915_perf_stream (Lionel)

v7: Pin VMA outside request creation (Chris)
Lock VMA before move to active (Chris)

v8: Fix double free on stream->initial_oa_config_bo (Lionel)
Don't allow interruption when waiting on active config request
(Lionel)

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson 
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_perf.c | 199 ---
 1 file changed, 156 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index abb7a70e17ec..c2431b5a1f55 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1731,56 +1731,181 @@ static int alloc_noa_wait(struct i915_perf_stream 
*stream)
return 0;
 
 err_unpin:
-   __i915_vma_unpin(vma);
+   i915_vma_unpin_and_release(, 0);
 err_unref:
i915_gem_object_put(bo);
return ret;
 }
 
-static void config_oa_regs(struct intel_uncore *uncore,
-  const struct i915_oa_reg *regs,
-  u32 n_regs)
+static u32 *write_cs_mi_lri(u32 *cs,
+   const struct i915_oa_reg *reg_data,
+   u32 n_regs)
 {
u32 i;
 
for (i = 0; i < n_regs; i++) {
-   const struct i915_oa_reg *reg = regs + i;
+   if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
+   u32 n_lri = min_t(u32,
+ n_regs - i,
+ MI_LOAD_REGISTER_IMM_MAX_REGS);
+
+   *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
+   }
+   *cs++ = i915_mmio_reg_offset(reg_data[i].addr);
+   *cs++ = reg_data[i].value;
+   }
+
+   return cs;
+}
+
+static int num_lri_dwords(int num_regs)
+{
+   int count = 0;
+
+   if (num_regs > 0) {
+   count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS);
+   count += num_regs * 2;
+   }
+
+   return count;
+}
+
+static struct i915_oa_config_bo *
+alloc_oa_config_buffer(struct i915_perf_stream *stream,
+  struct i915_oa_config *oa_config)
+{
+   struct drm_i915_gem_object *obj;
+   struct i915_oa_config_bo *oa_bo;
+   size_t config_length = 0;
+   u32 *cs;
+   int err;
+
+   oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
+   if (!oa_bo)
+   return ERR_PTR(-ENOMEM);
+
+   config_length += num_lri_dwords(oa_config->mux_regs_len);
+   config_length += num_lri_dwords(oa_config->b_counter_regs_len);
+   config_length += num_lri_dwords(oa_config->flex_regs_len);
+   config_length++; /* MI_BATCH_BUFFER_END */
+   config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
+
+   obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
+   if (IS_ERR(obj)) {
+   err = PTR_ERR(obj);
+   goto err_free;
+   }
+
+   cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
+   if (IS_ERR(cs)) {
+   err = PTR_ERR(cs);
+   goto err_oa_bo;
+   }
 
-   intel_uncore_write(uncore, reg->addr, reg->value);
+   cs = write_cs_mi_lri(cs,
+oa_config->mux_regs,
+oa_config->mux_regs_len);
+   cs = write_cs_mi_lri(cs,
+oa_config->b_counter_regs,
+oa_config->b_counter_regs_len);
+   cs = write_cs_mi_lri(cs,
+oa_config->flex_regs,
+oa_config->flex_regs_len);
+
+   *cs++ = MI_BATCH_BUFFER_END;
+
+   i915_gem_object_flush_map(obj);
+   i915_gem_object_unpin_map(obj);
+
+   oa_bo->vma = i915_vma_instance(obj,
+  >engine->gt->ggtt->vm,
+  NULL);
+   if (IS_ERR(oa_bo->vma)) {
+   err = PTR_ERR(oa_bo->vma);
+   goto err_oa_bo;
}
+
+   oa_bo->oa_config = i915_oa_config_get(oa_config);
+   ll

[Intel-gfx] [CI 5/9] drm/i915/perf: implement active wait for noa configurations

2019-10-11 Thread Chris Wilson
From: Lionel Landwerlin 

NOA configuration take some amount of time to apply. That amount of
time depends on the size of the GT. There is no documented time for
this. For example, past experimentations with powergating
configuration changes seem to indicate a 60~70us delay. We go with
500us as default for now which should be over the required amount of
time (according to HW architects).

v2: Don't forget to save/restore registers used for the wait (Chris)

v3: Name used CS_GPR registers (Chris)
Fix compile issue due to rebase (Lionel)

v4: Fix save/restore helpers (Umesh)

v5: Move noa_wait from drm_i915_private to i915_perf_stream (Lionel)

v6: Add missing struct declarations in i915_perf.h

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson  (v4)
Signed-off-by: Chris Wilson  (v4)
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |   4 +-
 drivers/gpu/drm/i915/gt/intel_gt_types.h  |   5 +
 drivers/gpu/drm/i915/i915_debugfs.c   |  32 +++
 drivers/gpu/drm/i915/i915_perf.c  | 224 ++
 drivers/gpu/drm/i915/i915_perf_types.h|   8 +
 drivers/gpu/drm/i915/i915_reg.h   |   4 +-
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 drivers/gpu/drm/i915/selftests/i915_perf.c| 216 +
 8 files changed, 492 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/selftests/i915_perf.c

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 0987100c786b..8e63cffcabe0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -163,7 +163,8 @@
 #define MI_BATCH_BUFFER_START  MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
 #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1)
-#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
+#define   MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
+#define   MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/
 
 /*
  * 3D instructions used by the kernel
@@ -224,6 +225,7 @@
 #define   PIPE_CONTROL_CS_STALL(1<<20)
 #define   PIPE_CONTROL_TLB_INVALIDATE  (1<<18)
 #define   PIPE_CONTROL_MEDIA_STATE_CLEAR   (1<<16)
+#define   PIPE_CONTROL_WRITE_TIMESTAMP (3<<14)
 #define   PIPE_CONTROL_QW_WRITE(1<<14)
 #define   PIPE_CONTROL_POST_SYNC_OP_MASK(3<<14)
 #define   PIPE_CONTROL_DEPTH_STALL (1<<13)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 802f516a3430..be4b263621c8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -109,6 +109,11 @@ enum intel_gt_scratch_field {
/* 8 bytes */
INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
 
+   /* 6 * 8 bytes */
+   INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048,
+
+   /* 4 bytes */
+   INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096,
 };
 
 #endif /* __INTEL_GT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index e575761550ac..a541b6ae534f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3590,6 +3590,37 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
i915_wedged_get, i915_wedged_set,
"%llu\n");
 
+static int
+i915_perf_noa_delay_set(void *data, u64 val)
+{
+   struct drm_i915_private *i915 = data;
+   const u32 clk = RUNTIME_INFO(i915)->cs_timestamp_frequency_khz;
+
+   /*
+* This would lead to infinite waits as we're doing timestamp
+* difference on the CS with only 32bits.
+*/
+   if (val > mul_u32_u32(U32_MAX, clk))
+   return -EINVAL;
+
+   atomic64_set(>perf.noa_programming_delay, val);
+   return 0;
+}
+
+static int
+i915_perf_noa_delay_get(void *data, u64 *val)
+{
+   struct drm_i915_private *i915 = data;
+
+   *val = atomic64_read(>perf.noa_programming_delay);
+   return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops,
+   i915_perf_noa_delay_get,
+   i915_perf_noa_delay_set,
+   "%llu\n");
+
 #define DROP_UNBOUND   BIT(0)
 #define DROP_BOUND BIT(1)
 #define DROP_RETIREBIT(2)
@@ -4345,6 +4376,7 @@ static const struct i915_debugfs_files {
const char *name;
const struct file_operations *fops;
 } i915_debugfs_files[] = {
+   {"i915_perf_noa_delay", _perf_noa_delay_fops},
{"i915_wedged", _wedged_fops},
{"i915_cache_sharing", _cache_sharing_fops},
{"i915_gem_drop_caches", _drop_caches_fops},
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/driver

[Intel-gfx] [CI 8/9] drm/i915/perf: allow holding preemption on filtered ctx

2019-10-11 Thread Chris Wilson
From: Lionel Landwerlin 

We would like to make use of perf in Vulkan. The Vulkan API is much
lower level than OpenGL, with applications directly exposed to the
concept of command buffers (pretty much equivalent to our batch
buffers). In Vulkan, queries are always limited in scope to a command
buffer. In OpenGL, the lack of command buffer concept meant that
queries' duration could span multiple command buffers.

With that restriction gone in Vulkan, we would like to simplify
measuring performance just by measuring the deltas between the counter
snapshots written by 2 MI_RECORD_PERF_COUNT commands, rather than the
more complex scheme we currently have in the GL driver, using 2
MI_RECORD_PERF_COUNT commands and doing some post processing on the
stream of OA reports, coming from the global OA buffer, to remove any
unrelated deltas in between the 2 MI_RECORD_PERF_COUNT.

Disabling preemption only apply to a single context with which want to
query performance counters for and is considered a privileged
operation, by default protected by CAP_SYS_ADMIN. It is possible to
enable it for a normal user by disabling the paranoid stream setting.

v2: Store preemption setting in intel_context (Chris)

v3: Use priorities to avoid preemption rather than the HW mechanism

v4: Just modify the port priority reporting function

v5: Add nopreempt flag on gem context and always flag requests
appropriately, regarless of OA reconfiguration.

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson 
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.h   | 18 ++
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c|  3 ++
 drivers/gpu/drm/i915/i915_perf.c  | 34 +--
 drivers/gpu/drm/i915/i915_perf_types.h|  8 +
 include/uapi/drm/i915_drm.h   | 11 ++
 6 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 9234586830d1..cfe80590f0ed 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -114,6 +114,24 @@ i915_gem_context_clear_user_engines(struct 
i915_gem_context *ctx)
clear_bit(CONTEXT_USER_ENGINES, >flags);
 }
 
+static inline bool
+i915_gem_context_nopreempt(const struct i915_gem_context *ctx)
+{
+   return test_bit(CONTEXT_NOPREEMPT, >flags);
+}
+
+static inline void
+i915_gem_context_set_nopreempt(struct i915_gem_context *ctx)
+{
+   set_bit(CONTEXT_NOPREEMPT, >flags);
+}
+
+static inline void
+i915_gem_context_clear_nopreempt(struct i915_gem_context *ctx)
+{
+   clear_bit(CONTEXT_NOPREEMPT, >flags);
+}
+
 static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
 {
return !ctx->file_priv;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index ab8e1367dfc8..fe97b8ba4fda 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -146,6 +146,7 @@ struct i915_gem_context {
 #define CONTEXT_CLOSED 1
 #define CONTEXT_FORCE_SINGLE_SUBMISSION2
 #define CONTEXT_USER_ENGINES   3
+#define CONTEXT_NOPREEMPT  4
 
struct mutex mutex;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 98816c35ffc3..e96901888323 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2077,6 +2077,9 @@ static int eb_submit(struct i915_execbuffer *eb)
if (err)
return err;
 
+   if (i915_gem_context_nopreempt(eb->gem_context))
+   eb->request->flags |= I915_REQUEST_NOPREEMPT;
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 5daaf8d0bdc3..307116078e16 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -344,6 +344,8 @@ static const struct i915_oa_format 
gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
  * struct perf_open_properties - for validated properties given to open a 
stream
  * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags
  * @single_context: Whether a single or all gpu contexts should be monitored
+ * @hold_preemption: Whether the preemption is disabled for the filtered
+ *   context
  * @ctx_handle: A gem ctx handle for use with @single_context
  * @metrics_set: An ID for an OA unit metric set advertised via sysfs
  * @oa_format: An OA unit HW report format
@@ -359,6 +361,7 @@ struct perf_open_properties {
u32 sample_flags;
 
u64 single_context:1;
+   u64 hold_preemption:1;
u64 ctx_handle;
 
/* OA sampling state */
@@ -2512,6 +2515,8 @@ static int i9

[Intel-gfx] [CI 3/9] drm/i915/perf: allow for CS OA configs to be created lazily

2019-10-11 Thread Chris Wilson
From: Lionel Landwerlin 

Here we introduce a mechanism by which the execbuf part of the i915
driver will be able to request that a batch buffer containing the
programming for a particular OA config be created.

We'll execute these OA configuration buffers right before executing a
set of userspace commands so that a particular user batchbuffer be
executed with a given OA configuration.

This mechanism essentially allows the userspace driver to go through
several OA configuration without having to open/close the i915/perf
stream.

v2: No need for locking on object OA config object creation (Chris)
Flush cpu mapping of OA config (Chris)

v3: Properly deal with the perf_metric lock (Chris/Lionel)

v4: Fix oa config unref/put when not found (Lionel)

v5: Allocate BOs for configurations on the stream instead of globally
(Lionel)

v6: Fix 64bit division (Chris)

v7: Store allocated config BOs into the stream (Lionel)

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson  (v4)
Signed-off-by: Chris Wilson  (v4)
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |   1 +
 drivers/gpu/drm/i915/i915_perf.c | 107 +++
 drivers/gpu/drm/i915/i915_perf.h |  24 +
 drivers/gpu/drm/i915/i915_perf_types.h   |  23 ++--
 4 files changed, 102 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index b0227ab2fe1b..0987100c786b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -138,6 +138,7 @@
 /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
 #define   MI_LRI_CS_MMIO   (1<<19)
 #define   MI_LRI_FORCE_POSTED  (1<<12)
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
 #define MI_STORE_REGISTER_MEMMI_INSTR(0x24, 1)
 #define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
 #define   MI_SRM_LRM_GLOBAL_GTT(1<<22)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 0b51ab3ab523..5fa0df46fcc3 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -369,52 +369,52 @@ struct perf_open_properties {
struct intel_engine_cs *engine;
 };
 
+struct i915_oa_config_bo {
+   struct llist_node node;
+
+   struct i915_oa_config *oa_config;
+   struct i915_vma *vma;
+};
+
 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
 
-static void free_oa_config(struct i915_oa_config *oa_config)
+void i915_oa_config_release(struct kref *ref)
 {
+   struct i915_oa_config *oa_config =
+   container_of(ref, typeof(*oa_config), ref);
+
if (!PTR_ERR(oa_config->flex_regs))
kfree(oa_config->flex_regs);
if (!PTR_ERR(oa_config->b_counter_regs))
kfree(oa_config->b_counter_regs);
if (!PTR_ERR(oa_config->mux_regs))
kfree(oa_config->mux_regs);
-   kfree(oa_config);
-}
-
-static void put_oa_config(struct i915_oa_config *oa_config)
-{
-   if (!atomic_dec_and_test(_config->ref_count))
-   return;
 
-   free_oa_config(oa_config);
+   kfree_rcu(oa_config, rcu);
 }
 
-static int get_oa_config(struct i915_perf *perf,
-int metrics_set,
-struct i915_oa_config **out_config)
+struct i915_oa_config *
+i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
 {
-   int ret;
-
-   if (metrics_set == 1) {
-   *out_config = >test_config;
-   atomic_inc(>test_config.ref_count);
-   return 0;
-   }
-
-   ret = mutex_lock_interruptible(>metrics_lock);
-   if (ret)
-   return ret;
+   struct i915_oa_config *oa_config;
 
-   *out_config = idr_find(>metrics_idr, metrics_set);
-   if (!*out_config)
-   ret = -EINVAL;
+   rcu_read_lock();
+   if (metrics_set == 1)
+   oa_config = >test_config;
else
-   atomic_inc(&(*out_config)->ref_count);
+   oa_config = idr_find(>metrics_idr, metrics_set);
+   if (oa_config)
+   oa_config = i915_oa_config_get(oa_config);
+   rcu_read_unlock();
 
-   mutex_unlock(>metrics_lock);
+   return oa_config;
+}
 
-   return ret;
+static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
+{
+   i915_oa_config_put(oa_bo->oa_config);
+   i915_vma_put(oa_bo->vma);
+   kfree(oa_bo);
 }
 
 static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
@@ -1337,6 +1337,16 @@ free_oa_buffer(struct i915_perf_stream *stream)
stream->oa_buffer.vaddr = NULL;
 }
 
+static void
+free_oa_configs(struct i915_perf_stream *stream)
+{
+   struct i915_oa_config_bo *oa_bo, *tmp;
+
+   i915_oa_config_put(stream->oa_confi

[Intel-gfx] [PATCH i-g-t] debugfs: Define DROP_RCU

2019-10-11 Thread Chris Wilson
Corresponding kernel commit 54895010a893 ("drm/i915: Add an rcu_barrier
option to i915_drop_caches")

Signed-off-by: Chris Wilson 
Cc: Matthew Auld 
---
 lib/igt_debugfs.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/lib/igt_debugfs.h b/lib/igt_debugfs.h
index 36b638177..a56f09dd2 100644
--- a/lib/igt_debugfs.h
+++ b/lib/igt_debugfs.h
@@ -172,6 +172,13 @@ void igt_require_hpd_storm_ctl(int fd);
  * Reset the global request seqno counter back to 0
  */
 #define DROP_RESET_SEQNO 0x100
+/**
+ * DROP_RCU:
+ *
+ * Performs rcu_barrier() and waits for an RCU grace period to complete,
+ * which will flush any RCU callbacks and deferred tasks.
+ */
+#define DROP_RCU 0x200
 /**
  * DROP_ALL:
  *
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 3/3] drm/i915/tgl: Add extra hdc flush workaround

2019-10-11 Thread Chris Wilson
Quoting Mika Kuoppala (2019-10-11 14:39:11)
> In order to ensure constant caches are invalidated
> properly with a0, we need extra hdc flush after invalidation.
> 
> References: HSDES#1604544889
> Signed-off-by: Mika Kuoppala 
> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.c | 18 ++
>  1 file changed, 18 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 967e5b2e5e80..8db86772d8d4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -3236,6 +3236,24 @@ static int gen12_emit_flush_render(struct i915_request 
> *request,
>  
> *cs++ = preparser_disable(false);
> intel_ring_advance(request, cs);
> +
> +   /*
> +* Workaround constant cache invalidation issue
> +* for tgl:a0, #1604544889

Do we have IS_TGL_REVID() yet? They should act as a reminder to remove
them later.

> +*/
> +   flags = 0;
> +   flags |= PIPE_CONTROL_CS_STALL;
> +   flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
> +
> +   flags |= PIPE_CONTROL_STORE_DATA_INDEX;
> +   flags |= PIPE_CONTROL_QW_WRITE;

Acked-by: Chris Wilson 
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 2/3] drm/i915/tgl: Add HDC Pipeline Flush

2019-10-11 Thread Chris Wilson
Quoting Mika Kuoppala (2019-10-11 14:39:10)
> Add hdc pipeline flush to ensure memory state is coherent
> in L3 when we are done.
> 
> Signed-off-by: Mika Kuoppala 
> ---
>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 +
>  drivers/gpu/drm/i915/gt/intel_lrc.c  | 1 +
>  2 files changed, 2 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
> b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index 8c8e6bf824a9..696b6495b0da 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -232,6 +232,7 @@
>  #define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE(1<<10) /* 
> GM45+ only */
>  #define   PIPE_CONTROL_L3_RO_CACHE_INVALIDATE  (1<<10) /* gen12 */
>  #define   PIPE_CONTROL_INDIRECT_STATE_DISABLE  (1<<9)
> +#define   PIPE_CONTROL_HDC_PIPELINE_FLUSH  (1<<9)  /* gen 12 */

Ack. Just need to tweak usage.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 1/3] drm/i915/tgl: Include ro parts of l3 to invalidate

2019-10-11 Thread Chris Wilson
Quoting Mika Kuoppala (2019-10-11 14:39:09)
> Aim for completeness and invalidate also the ro parts
> in l3 cache. This might allow to get rid of the preparser
> disable/enable workaround on invalidation path.
> 
> Cc: Chris Wilson 
> Signed-off-by: Mika Kuoppala 
> ---
>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 +
>  drivers/gpu/drm/i915/gt/intel_lrc.c  | 1 +
>  2 files changed, 2 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
> b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index b0227ab2fe1b..8c8e6bf824a9 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -230,6 +230,7 @@
>  #define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH   (1<<12) /* gen6+ */
>  #define   PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE(1<<11) /* MBZ on ILK 
> */
>  #define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE(1<<10) /* 
> GM45+ only */
> +#define   PIPE_CONTROL_L3_RO_CACHE_INVALIDATE  (1<<10) /* gen12 */

Ack.

>  #define   PIPE_CONTROL_INDIRECT_STATE_DISABLE  (1<<9)
>  #define   PIPE_CONTROL_NOTIFY  (1<<8)
>  #define   PIPE_CONTROL_FLUSH_ENABLE(1<<7) /* gen7+ */
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index b00499cc7586..c6fbc723566f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -3213,6 +3213,7 @@ static int gen12_emit_flush_render(struct i915_request 
> *request,
> flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
> flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
> flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
> +   flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
>  
> flags |= PIPE_CONTROL_STORE_DATA_INDEX;
> flags |= PIPE_CONTROL_QW_WRITE;

Reviewed-by: Chris Wilson 
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 2/3] drm/i915/tgl: Add HDC Pipeline Flush

2019-10-11 Thread Chris Wilson
Quoting Mika Kuoppala (2019-10-11 14:39:10)
> Add hdc pipeline flush to ensure memory state is coherent
> in L3 when we are done.
> 
> Signed-off-by: Mika Kuoppala 
> ---
>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 +
>  drivers/gpu/drm/i915/gt/intel_lrc.c  | 1 +
>  2 files changed, 2 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
> b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index 8c8e6bf824a9..696b6495b0da 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -232,6 +232,7 @@
>  #define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE(1<<10) /* 
> GM45+ only */
>  #define   PIPE_CONTROL_L3_RO_CACHE_INVALIDATE  (1<<10) /* gen12 */
>  #define   PIPE_CONTROL_INDIRECT_STATE_DISABLE  (1<<9)
> +#define   PIPE_CONTROL_HDC_PIPELINE_FLUSH  (1<<9)  /* gen 12 */
>  #define   PIPE_CONTROL_NOTIFY  (1<<8)
>  #define   PIPE_CONTROL_FLUSH_ENABLE(1<<7) /* gen7+ */
>  #define   PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5)
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index c6fbc723566f..967e5b2e5e80 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -3188,6 +3188,7 @@ static int gen12_emit_flush_render(struct i915_request 
> *request,
> flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
> flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
> flags |= PIPE_CONTROL_FLUSH_ENABLE;
> +   flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;

Adding a flush here has no effect, see gen12_fini_breadcrumbs_rcs.

>  
> flags |= PIPE_CONTROL_QW_WRITE;
> -- 
> 2.17.1
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915: Add an rcu_barrier option to i915_drop_caches

2019-10-11 Thread Chris Wilson
Sometimes a test has to wait for RCU to complete a grace period and
perform its callbacks, for example waiting for a close(fd) to actually
perform the fput(filp) and so trigger all the callbacks such as closing
GEM contexts. There is no trivial means of triggering an RCU barrier
from userspace, so add one for our convenience in
debugfs/i915_drop_caches

Signed-off-by: Chris Wilson 
Cc: Matthew Auld 
---
 drivers/gpu/drm/i915/i915_debugfs.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 29c918a1580e..c7fc402bd8f3 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3513,6 +3513,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
 #define DROP_IDLE  BIT(6)
 #define DROP_RESET_ACTIVE  BIT(7)
 #define DROP_RESET_SEQNO   BIT(8)
+#define DROP_RCU   BIT(9)
 #define DROP_ALL (DROP_UNBOUND | \
  DROP_BOUND| \
  DROP_RETIRE   | \
@@ -3521,7 +3522,8 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
  DROP_SHRINK_ALL |\
  DROP_IDLE | \
  DROP_RESET_ACTIVE | \
- DROP_RESET_SEQNO)
+ DROP_RESET_SEQNO | \
+ DROP_RCU)
 static int
 i915_drop_caches_get(void *data, u64 *val)
 {
@@ -3573,6 +3575,9 @@ i915_drop_caches_set(void *data, u64 val)
i915_gem_shrink_all(i915);
fs_reclaim_release(GFP_KERNEL);
 
+   if (val & DROP_RCU)
+   rcu_barrier();
+
if (val & DROP_FREED)
i915_gem_drain_freed_objects(i915);
 
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 08/10] drm/i915: Cancel non-persistent contexts on close

2019-10-11 Thread Chris Wilson
Quoting Chris Wilson (2019-10-11 15:22:17)
> Quoting Tvrtko Ursulin (2019-10-11 14:55:00)
> > 
> > On 10/10/2019 08:14, Chris Wilson wrote:
> > > + if (engine)
> > > + active |= engine->mask;
> > > +
> > > + dma_fence_put(fence);
> > > + }
> > > +
> > > + /*
> > > +  * Send a "high priority pulse" down the engine to cause the
> > > +  * current request to be momentarily preempted. (If it fails to
> > > +  * be preempted, it will be reset). As we have marked our context
> > > +  * as banned, any incomplete request, including any running, will
> > > +  * be skipped following the preemption.
> > > +  */
> > > + reset = 0;
> > > + for_each_engine_masked(engine, gt->i915, active, tmp)
> > > + if (intel_engine_pulse(engine))
> > > + reset |= engine->mask;
> > 
> > What if we were able to send a pulse, but the hog cannot be preempted 
> > and hangcheck is obviously disabled - who will do the reset?
> 
> Hmm, the idea is that forced-preemption causes the reset.
> (See igt/gem_ctx_persistence/hostile)
> 
> However, if we give the sysadmin the means to disable force-preemption,
> we just gave them another shovel to dig a hole with.
> 
> A last resort would be another timer here to ensure the context was
> terminated.

That does not cut it, as we only looking at it from the pov of the
context being guilty and not the victim. So the answer remains forced
preemption, and a backdoor if that is disabled.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH] drm/i915: Expose engine properties via sysfs

2019-10-11 Thread Chris Wilson
Quoting Summers, Stuart (2019-10-11 16:14:07)
> On Fri, 2019-10-11 at 12:36 +0100, Chris Wilson wrote:
> > +void intel_engines_add_sysfs(struct drm_i915_private *i915)
> > +{
> > + static const struct attribute *files[] = {
> > + _attr.attr,
> > + _attr.attr,
> > + _attr.attr,
> > + _attr.attr,
> > + NULL
> > + };
> > +
> > + struct device *kdev = i915->drm.primary->kdev;
> > + struct intel_engine_cs *engine;
> > + struct kobject *dir;
> > +
> > + dir = kobject_create_and_add("engine", >kobj);
> > + if (!dir)
> > + return;
> > +
> > + for_each_uabi_engine(engine, i915) {
> > + struct kobject *kobj;
> > +
> > + kobj = kobj_engine(dir, engine);
> > + if (!kobj)
> > + goto err_engine;
> > +
> > + if (sysfs_create_files(kobj, files))
> > + goto err_engine;
> > +
> > + if (0) {
> > +err_engine:
> 
> Can you explain why we need this goto/if 0 over a simple print/break
> under the if(sysfs_create_files()) call above? At a glance this just
> seems overly complicated.

There's more to come.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 10/10] drm/i915: Flush idle barriers when waiting

2019-10-11 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-10-11 15:56:35)
> 
> On 10/10/2019 08:14, Chris Wilson wrote:
> > If we do find ourselves with an idle barrier inside our active while
> > waiting, attempt to flush it by emitting a pulse using the kernel
> > context.
> 
> The point of this one completely escapes me at the moment. Idle barriers 
> are kept in there to be consumed by the engine_pm parking, so if any 
> random waiter finds some (there will always be some, as long as the 
> engine executed some user context, right?),

Not any random waiter; the waiter has to be waiting on a context that
was active and so setup a barrier.

> why would it want to handle 
> them? Again just to use the opportunity for some house keeping? But what 
> if the system is otherwise quite busy and a low-priority client just 
> happens to want to wait on something silly?

There's no guarantee that it will ever be flushed. So why wouldn't we
use a low priority request to give a semblance of forward progress and
give a guarantee that the wait will complete.

It's a hypothetical point, there are no waiters that need to wait upon
their own barriers at present. We are just completing the picture for
idle barrier tracking.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 09/10] drm/i915: Replace hangcheck by heartbeats

2019-10-11 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-10-11 15:24:21)
> 
> On 10/10/2019 08:14, Chris Wilson wrote:
> > +config DRM_I915_HEARTBEAT_INTERVAL
> > + int "Interval between heartbeat pulses (ms)"
> > + default 2500 # milliseconds
> > + help
> > +   While active the driver uses a periodic request, a heartbeat, to
> > +   check the wellness of the GPU and to regularly flush state changes
> > +   (idle barriers).
> 
> Should this be somehow reworded to be more end user friendly? My idea, 
> may need to be corrected for bad English:

End user friendly. Sure, but that means I didn't hide this well enough
;)
 
> The driver sends a periodic heartbeat down all active GT engines to 
> check the health of the GPU and undertake regular house-keeping of 
> internal driver state.
> 
> Main points from the user perspective: "request" - whaat? "idle 
> barriers" - ditto. "Wellness" - a bit unusual in this context, no?

> > +static void heartbeat(struct work_struct *wrk)
> > +{
> > + struct i915_sched_attr attr = {
> > + .priority = I915_USER_PRIORITY(I915_PRIORITY_MIN),
> 
> You were saying it's better to start from zero, right?

The first bump. Starting at lowest, means run when first idle. Then we
jump to 0 and be scheduled like any other normal user.

> > + };
> > + struct intel_engine_cs *engine =
> > + container_of(wrk, typeof(*engine), heartbeat.work.work);
> > + struct intel_context *ce = engine->kernel_context;
> > + struct i915_request *rq;
> > +
> > + if (!intel_engine_pm_get_if_awake(engine))
> > + return;
> > +
> > + rq = engine->heartbeat.systole;
> > + if (rq && i915_request_completed(rq)) {
> > + i915_request_put(rq);
> > + engine->heartbeat.systole = NULL;
> > + }
> > +
> > + if (intel_gt_is_wedged(engine->gt))
> > + goto out;
> > +
> > + if (engine->heartbeat.systole) {
> > + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
> > + struct drm_printer p = drm_debug_printer(__func__);
> > +
> > + intel_engine_dump(engine, ,
> > +   "%s heartbeat not ticking\n",
> > +   engine->name);
> 
> This could perhaps be better only when we have reached a higher priority 
> attempt. Okay it's under DEBUG_GEM but still, not sure there is value in 
> being so panicky if for any reason preemption does not work. Heartbeat 
> does not depend on preemption as far as I could spot, right?

The challenge is evident by the else path where we immediately reset.
If we cause a preemption event from the heartbeat (even strictly at min
prio we could cause a timeslice to expire) it is useful to have the
debug in dmesg (as in CI we don't get error-state very often).

Yes, I've tried trimming it to only on the vital paths, but so far
haven't found a satisfactory means.

To make me happy I think I need to push it down into the reset routines
themselves. Hmm. Except those we definitely don't want dmesg spam as
they get runs 10s of thousands times during CI.

It'll do for now. I'm sure we'll get tired of it and find it a new home.

> > +static struct kobj_attribute heartbeat_interval_attr =
> > +__ATTR(heartbeat_interval_ms, 0600, heartbeat_interval_show, 
> > heartbeat_interval_store);
> >   
> >   static void kobj_engine_release(struct kobject *kobj)
> >   {
> > @@ -115,6 +141,9 @@ void intel_engines_add_sysfs(struct drm_i915_private 
> > *i915)
> >   _attr.attr,
> >   _attr.attr,
> >   _attr.attr,
> > +#if CONFIG_DRM_I915_HEARTBEAT_INTERVAL
> > + _interval_attr.attr,
> > +#endif
> 
> Presumably compiler is happy (or the linker) with only this part getting 
> the #ifdef treatment? (The show/store functions above don't have it.)

Yup, it's not annoying enough to complain about the dead globals. Although
it should be more than smart enough to remove them.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 08/10] drm/i915: Cancel non-persistent contexts on close

2019-10-11 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-10-11 14:55:00)
> 
> On 10/10/2019 08:14, Chris Wilson wrote:
> > Normally, we rely on our hangcheck to prevent persistent batches from
> > hogging the GPU. However, if the user disables hangcheck, this mechanism
> > breaks down. Despite our insistence that this is unsafe, the users are
> > equally insistent that they want to use endless batches and will disable
> > the hangcheck mechanism. We are looking at perhaps replacing hangcheck
> > with a softer mechanism, that sends a pulse down the engine to check if
> > it is well. We can use the same preemptive pulse to flush an active
> > persistent context off the GPU upon context close, preventing resources
> > being lost and unkillable requests remaining on the GPU after process
> > termination. To avoid changing the ABI and accidentally breaking
> > existing userspace, we make the persistence of a context explicit and
> > enable it by default (matching current ABI). Userspace can opt out of
> > persistent mode (forcing requests to be cancelled when the context is
> > closed by process termination or explicitly) by a context parameter. To
> > facilitate existing use-cases of disabling hangcheck, if the modparam is
> > disabled (i915.enable_hangcheck=0), we disable persistence mode by
> > default.  (Note, one of the outcomes for supporting endless mode will be
> > the removal of hangchecking, at which point opting into persistent mode
> > will be mandatory, or maybe the default perhaps controlled by cgroups.)
> > 
> > v2: Check for hangchecking at context termination, so that we are not
> > left with undying contexts from a crafty user.
> > 
> > Testcase: igt/gem_ctx_persistence
> > Signed-off-by: Chris Wilson 
> > Cc: Joonas Lahtinen 
> > Cc: Michał Winiarski 
> > Cc: Jon Bloomfield 
> > Reviewed-by: Jon Bloomfield 
> > ---
> >   drivers/gpu/drm/i915/gem/i915_gem_context.c   | 132 ++
> >   drivers/gpu/drm/i915/gem/i915_gem_context.h   |  15 ++
> >   .../gpu/drm/i915/gem/i915_gem_context_types.h |   1 +
> >   .../gpu/drm/i915/gem/selftests/mock_context.c |   2 +
> >   include/uapi/drm/i915_drm.h   |  15 ++
> >   5 files changed, 165 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
> > b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > index 5d8221c7ba83..46e5b3b53288 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > @@ -70,6 +70,7 @@
> >   #include 
> >   
> >   #include "gt/intel_lrc_reg.h"
> > +#include "gt/intel_engine_heartbeat.h"
> >   #include "gt/intel_engine_user.h"
> >   
> >   #include "i915_gem_context.h"
> > @@ -269,6 +270,78 @@ void i915_gem_context_release(struct kref *ref)
> >   schedule_work(>free_work);
> >   }
> >   
> > +static inline struct i915_gem_engines *
> > +__context_engines_static(struct i915_gem_context *ctx)
> > +{
> > + return rcu_dereference_protected(ctx->engines, true);
> > +}
> > +
> > +static void kill_context(struct i915_gem_context *ctx)
> > +{
> > + intel_engine_mask_t tmp, active, reset;
> > + struct intel_gt *gt = >i915->gt;
> > + struct i915_gem_engines_iter it;
> > + struct intel_engine_cs *engine;
> > + struct intel_context *ce;
> > +
> > + /*
> > +  * If we are already banned, it was due to a guilty request causing
> > +  * a reset and the entire context being evicted from the GPU.
> > +  */
> > + if (i915_gem_context_is_banned(ctx))
> > + return;
> > +
> > + i915_gem_context_set_banned(ctx);
> > +
> > + /*
> > +  * Map the user's engine back to the actual engines; one virtual
> > +  * engine will be mapped to multiple engines, and using ctx->engine[]
> > +  * the same engine may be have multiple instances in the user's map.
> > +  * However, we only care about pending requests, so only include
> > +  * engines on which there are incomplete requests.
> > +  */
> > + active = 0;
> > + for_each_gem_engine(ce, __context_engines_static(ctx), it) {
> > + struct dma_fence *fence;
> > +
> > + if (!ce->timeline)
> > + continue;
> > +
> > + fence = i915_active_fence_get(>timeline->last_request);
> > + if (!fence)
> > + continue;
> > +

Re: [Intel-gfx] [PATCH v2] drm/i915/execlists: Cancel banned contexts on schedule-out

2019-10-11 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-10-11 14:10:21)
> 
> On 11/10/2019 12:16, Chris Wilson wrote:
> > On schedule-out (CS completion) of a banned context, scrub the context
> > image so that we do not replay the active payload. The intent is that we
> > skip banned payloads on request submission so that the timeline
> > advancement continues on in the background. However, if we are returning
> > to a preempted request, i915_request_skip() is ineffective and instead we
> > need to patch up the context image so that it continues from the start
> > of the next request.
> > 
> > v2: Fixup cancellation so that we only scrub the payload of the active
> > request and do not short-circuit the breadcrumbs (which might cause
> > other contexts to execute out of order).
> > 
> > Signed-off-by: Chris Wilson 
> > Cc: Tvrtko Ursulin 
> > ---
> >   drivers/gpu/drm/i915/gt/intel_lrc.c|  91 ++---
> >   drivers/gpu/drm/i915/gt/selftest_lrc.c | 273 +
> >   2 files changed, 341 insertions(+), 23 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> > b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index 09fc5ecfdd09..809a5dd97c14 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -234,6 +234,9 @@ static void execlists_init_reg_state(u32 *reg_state,
> >const struct intel_engine_cs *engine,
> >const struct intel_ring *ring,
> >bool close);
> > +static void
> > +__execlists_update_reg_state(const struct intel_context *ce,
> > +  const struct intel_engine_cs *engine);
> >   
> >   static void __context_pin_acquire(struct intel_context *ce)
> >   {
> > @@ -256,6 +259,29 @@ static void mark_eio(struct i915_request *rq)
> >   i915_request_mark_complete(rq);
> >   }
> >   
> > +static struct i915_request *active_request(struct i915_request *rq)
> > +{
> > + const struct intel_context * const ce = rq->hw_context;
> > + struct i915_request *active = NULL;
> > + struct list_head *list;
> > +
> > + if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
> > + return rq;
> > +
> > + list = _request_active_timeline(rq)->requests;
> > + list_for_each_entry_from_reverse(rq, list, link) {
> > + if (i915_request_completed(rq))
> > + break;
> > +
> > + if (rq->hw_context != ce)
> > + break;
> 
> Would it be of any value here to also check the initial breadcrumb matches?

Not currently. I don't it makes any difference whether or not we are
inside the payload on cancel_active() path as we know we an active
context. More fun and games for the reset path as we need to minimise
collateral damage.

> > +static void cancel_active(struct i915_request *rq,
> > +   struct intel_engine_cs *engine)
> > +{
> > + struct intel_context * const ce = rq->hw_context;
> > + u32 *regs = ce->lrc_reg_state;
> > +
> > + /*
> > +  * The executing context has been cancelled. Fixup the context so that
> > +  * it continues on from the breadcrumb after the batch and will be
> > +  * marked as incomplete [-EIO] upon signaling. We preserve the
> 
> Where does the -EIO marking happen now?

On the next __i915_request_submit()

> > +  * breadcrumbs and semaphores of the subsequent requests so that
> > +  * inter-timeline dependencies remain correctly ordered.
> > +  */
> > + GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
> > +   __func__, engine->name, rq->fence.context, rq->fence.seqno);
> > +
> > + __context_pin_acquire(ce);
> > +
> > + /* On resubmission of the active request, it's payload be scrubbed */
> > + rq = active_request(rq);
> > + if (rq)
> > + ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
> > + else
> > + ce->ring->head = ce->ring->tail;
> 
> I don't quite understand yet.
> 
> If a context was banned I'd expect all requests on the tl->requests to 
> be zapped and we only move to execute the last breadcrumb, no?

We do zap them all, on __i915_request_submit(). What we are preserving
is the dependency chains as we don't want to emit the final breadcrumb
before its dependencies have been signaled. (Otherwise our optimisation
of only waiting for the end of the cha

Re: [Intel-gfx] [PATCH] drm/i915: Honour O_NONBLOCK before throttling execbuf submissions

2019-10-11 Thread Chris Wilson
Quoting Chris Wilson (2019-10-10 14:48:49)
> Check the user's flags on the struct file before deciding whether or not
> to stall before submitting a request. This allows us to reasonably
> cheaply honour O_NONBLOCK without checking at more critical phases
> during request submission.
> 
> Suggested-by: Joonas Lahtinen 
> Signed-off-by: Chris Wilson 
> Cc: Joonas Lahtinen 
> ---
>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 21 ---
>  1 file changed, 14 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 98816c35ffc3..bc6bcb8f6d79 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -2189,15 +2189,22 @@ static int __eb_pin_engine(struct i915_execbuffer 
> *eb, struct intel_context *ce)
> intel_context_timeline_unlock(tl);
>  
> if (rq) {
> -   if (i915_request_wait(rq,
> - I915_WAIT_INTERRUPTIBLE,
> - MAX_SCHEDULE_TIMEOUT) < 0) {
> -   i915_request_put(rq);
> -   err = -EINTR;
> -   goto err_exit;
> -   }
> +   bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
> +   long timeout;

The alternative or addendum would be to use an execbuf.flag to opt out
of throttling. O_NONBLOCK seems fitting though.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH i-g-t] Check all sysfs entries are readable without dmesg spam

2019-10-11 Thread Chris Wilson
We already check that debugfs do not cause spam (and they tend to be
more heavyhanded and so more likely to break), but that does not excuse
not checking our sysfs directory!

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 tests/debugfs_test.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/debugfs_test.c b/tests/debugfs_test.c
index f8dd851c9..2d4753df5 100644
--- a/tests/debugfs_test.c
+++ b/tests/debugfs_test.c
@@ -56,7 +56,7 @@ static void read_and_discard_sysfs_entries(int path_fd, int 
indent)
igt_debug("%sEntering subdir %s\n", tabs, 
dirent->d_name);
read_and_discard_sysfs_entries(sub_fd, indent + 1);
close(sub_fd);
-   } else {
+   } else if (dirent->d_type == DT_REG) {
char buf[512];
int sub_fd;
ssize_t ret;
@@ -149,7 +149,7 @@ static void kms_tests(int fd, int debugfs)
 
 igt_main
 {
-   int fd = -1, debugfs;
+   int fd = -1, debugfs, sysfs;
 
igt_skip_on_simulation();
 
@@ -157,10 +157,13 @@ igt_main
fd = drm_open_driver_master(DRIVER_INTEL);
igt_require_gem(fd);
debugfs = igt_debugfs_dir(fd);
+   sysfs = igt_sysfs_open(fd);
 
kmstest_set_vt_graphics_mode();
}
 
+   igt_subtest("sysfs")
+   read_and_discard_sysfs_entries(sysfs, 0);
igt_subtest("read_all_entries")
read_and_discard_sysfs_entries(debugfs, 0);
 
@@ -168,6 +171,7 @@ igt_main
kms_tests(fd, debugfs);
 
igt_fixture {
+   close(sysfs);
close(debugfs);
close(fd);
}
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH] drm/i915: Expose engine properties via sysfs

2019-10-11 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-10-11 13:16:33)
> 
> On 11/10/2019 12:36, Chris Wilson wrote:
> > Preliminary stub to add engines underneath /sys/class/drm/cardN/, so
> > that we can expose properties on each engine to the sysadmin.
> > 
> > To start with we have basic analogues of the i915_query ioctl so that we
> > can pretty print engine discovery from the shell, and flesh out the
> > directory structure. Later we will add writeable sysadmin properties such
> > as per-engine timeout controls.
> > 
> > An example tree of the engine properties on Braswell:
> >  /sys/class/drm/card0
> >  └── engine
> >      ├── bcs0
> >      │   ├── capabilities
> >      │   ├── class
> >      │   ├── instance
> >      │   └── name
> >      ├── rcs0
> >      │   ├── capabilities
> >      │   ├── class
> >      │   ├── instance
> >      │   └── name
> >      ├── vcs0
> >      │   ├── capabilities
> >      │   ├── class
> >      │   ├── instance
> >      │   └── name
> >      └── vecs0
> >          ├── capabilities
> >      ├── class
> >      ├── instance
> >      └── name
> > 
> > v2: Include stringified capabilities
> > 
> > Signed-off-by: Chris Wilson 
> > Cc: Joonas Lahtinen 
> > Cc: Tvrtko Ursulin 
> > Cc: Daniele Ceraolo Spurio 
> > Cc: Rodrigo Vivi 
> > Acked-by: Rodrigo Vivi 
> > ---
> >   drivers/gpu/drm/i915/Makefile|   3 +-
> >   drivers/gpu/drm/i915/gt/intel_engine_sysfs.c | 177 +++
> >   drivers/gpu/drm/i915/gt/intel_engine_sysfs.h |  14 ++
> >   drivers/gpu/drm/i915/i915_sysfs.c|   3 +
> >   4 files changed, 196 insertions(+), 1 deletion(-)
> >   create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
> >   create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_sysfs.h
> > 
> > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> > index e791d9323b51..cd9a10ba2516 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -78,8 +78,9 @@ gt-y += \
> >   gt/intel_breadcrumbs.o \
> >   gt/intel_context.o \
> >   gt/intel_engine_cs.o \
> > - gt/intel_engine_pool.o \
> >   gt/intel_engine_pm.o \
> > + gt/intel_engine_pool.o \
> > + gt/intel_engine_sysfs.o \
> >   gt/intel_engine_user.o \
> >   gt/intel_gt.o \
> >   gt/intel_gt_irq.o \
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c 
> > b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
> > new file mode 100644
> > index ..f6e4822f8928
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
> > @@ -0,0 +1,177 @@
> > +/*
> > + * SPDX-License-Identifier: MIT
> > + *
> > + * Copyright © 2019 Intel Corporation
> > + */
> > +
> > +#include 
> > +#include 
> > +
> > +#include "i915_drv.h"
> > +#include "intel_engine.h"
> > +#include "intel_engine_sysfs.h"
> > +
> > +struct kobj_engine {
> > + struct kobject base;
> > + struct intel_engine_cs *engine;
> > +};
> > +
> > +static struct intel_engine_cs *kobj_to_engine(struct kobject *kobj)
> > +{
> > + return container_of(kobj, struct kobj_engine, base)->engine;
> > +}
> > +
> > +static ssize_t
> > +name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
> > +{
> > + return sprintf(buf, "%s\n", kobj_to_engine(kobj)->name);
> > +}
> > +
> > +static struct kobj_attribute name_attr =
> > +__ATTR(name, 0444, name_show, NULL);
> > +
> > +static ssize_t
> > +class_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
> > +{
> > + return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_class);
> > +}
> > +
> > +static struct kobj_attribute class_attr =
> > +__ATTR(class, 0444, class_show, NULL);
> > +
> > +static ssize_t
> > +inst_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
> > +{
> > + return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_instance);
> > +}
> > +
> > +static struct kobj_attribute inst_attr =
> > +__ATTR(instance, 0444, inst_show, NULL);
> > +
> > +static ssize_t repr_trim(char *buf, ssize_t len)
> > +{
> > + /* Trim off the trailing space and 

Re: [Intel-gfx] [bug report] drm/i915/selftests: Exercise context switching in parallel

2019-10-11 Thread Chris Wilson
Quoting Dan Carpenter (2019-10-11 12:42:09)
> Hello Chris Wilson,
> 
> This is a semi-automatic email about new static checker warnings.
> 
> The patch 50d16d44cce4: "drm/i915/selftests: Exercise context 
> switching in parallel" from Sep 30, 2019, leads to the following 
> Smatch complaint:
> 
> drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c:349 
> live_parallel_switch()
> error: we previously assumed 'data' could be null (see line 263)
> 
> drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
>257  }
>258  
>259  engines = i915_gem_context_lock_engines(ctx);
>260  count = engines->num_engines;
>261  
>262  data = kcalloc(count, sizeof(*data), GFP_KERNEL);
>263  if (!data) {
>264  i915_gem_context_unlock_engines(ctx);
>265  err = -ENOMEM;
>266  goto out;
> 
> goto out is always a sign of troubled times ahead...

Has been upgraded to out_file with claims of fewer burnt toast.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915: Expose engine properties via sysfs

2019-10-11 Thread Chris Wilson
Preliminary stub to add engines underneath /sys/class/drm/cardN/, so
that we can expose properties on each engine to the sysadmin.

To start with we have basic analogues of the i915_query ioctl so that we
can pretty print engine discovery from the shell, and flesh out the
directory structure. Later we will add writeable sysadmin properties such
as per-engine timeout controls.

An example tree of the engine properties on Braswell:
/sys/class/drm/card0
└── engine
    ├── bcs0
    │   ├── capabilities
    │   ├── class
    │   ├── instance
    │   └── name
    ├── rcs0
    │   ├── capabilities
    │   ├── class
    │   ├── instance
    │   └── name
    ├── vcs0
    │   ├── capabilities
    │   ├── class
    │   ├── instance
    │   └── name
    └── vecs0
        ├── capabilities
    ├── class
    ├── instance
    └── name

v2: Include stringified capabilities

Signed-off-by: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Tvrtko Ursulin 
Cc: Daniele Ceraolo Spurio 
Cc: Rodrigo Vivi 
Acked-by: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/Makefile|   3 +-
 drivers/gpu/drm/i915/gt/intel_engine_sysfs.c | 177 +++
 drivers/gpu/drm/i915/gt/intel_engine_sysfs.h |  14 ++
 drivers/gpu/drm/i915/i915_sysfs.c|   3 +
 4 files changed, 196 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_sysfs.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index e791d9323b51..cd9a10ba2516 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -78,8 +78,9 @@ gt-y += \
gt/intel_breadcrumbs.o \
gt/intel_context.o \
gt/intel_engine_cs.o \
-   gt/intel_engine_pool.o \
gt/intel_engine_pm.o \
+   gt/intel_engine_pool.o \
+   gt/intel_engine_sysfs.o \
gt/intel_engine_user.o \
gt/intel_gt.o \
gt/intel_gt_irq.o \
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
new file mode 100644
index ..f6e4822f8928
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
@@ -0,0 +1,177 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include 
+#include 
+
+#include "i915_drv.h"
+#include "intel_engine.h"
+#include "intel_engine_sysfs.h"
+
+struct kobj_engine {
+   struct kobject base;
+   struct intel_engine_cs *engine;
+};
+
+static struct intel_engine_cs *kobj_to_engine(struct kobject *kobj)
+{
+   return container_of(kobj, struct kobj_engine, base)->engine;
+}
+
+static ssize_t
+name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+   return sprintf(buf, "%s\n", kobj_to_engine(kobj)->name);
+}
+
+static struct kobj_attribute name_attr =
+__ATTR(name, 0444, name_show, NULL);
+
+static ssize_t
+class_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+   return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_class);
+}
+
+static struct kobj_attribute class_attr =
+__ATTR(class, 0444, class_show, NULL);
+
+static ssize_t
+inst_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+   return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_instance);
+}
+
+static struct kobj_attribute inst_attr =
+__ATTR(instance, 0444, inst_show, NULL);
+
+static ssize_t repr_trim(char *buf, ssize_t len)
+{
+   /* Trim off the trailing space and replace with a newline */
+   if (len > PAGE_SIZE)
+   len = PAGE_SIZE;
+   if (len > 0)
+   buf[len - 1] = '\n';
+
+   return len;
+}
+
+static ssize_t
+caps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+   static const char *vcs_repr[] = {
+  [ilog2(I915_VIDEO_CLASS_CAPABILITY_HEVC)] = "hevc",
+  [ilog2(I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC)] = "sfc",
+   };
+   static const char *vecs_repr[] = {
+  [ilog2(I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC)] = "sfc",
+   };
+   struct intel_engine_cs *engine = kobj_to_engine(kobj);
+   const char **repr;
+   int num_repr, n;
+   ssize_t len;
+
+   switch (engine->class) {
+   case VIDEO_DECODE_CLASS:
+   repr = vcs_repr;
+   num_repr = ARRAY_SIZE(vcs_repr);
+   break;
+
+   case VIDEO_ENHANCEMENT_CLASS:
+   repr = vecs_repr;
+   num_repr = ARRAY_SIZE(vecs_repr);
+   break;
+
+   default:
+   repr = NULL;
+   num_repr = 0;
+   break;
+   }
+
+   len = 0;
+   for_each_set_bit(n,
+(unsigned long *)>uabi_capabilities,
+

[Intel-gfx] [PATCH v2] drm/i915/execlists: Cancel banned contexts on schedule-out

2019-10-11 Thread Chris Wilson
On schedule-out (CS completion) of a banned context, scrub the context
image so that we do not replay the active payload. The intent is that we
skip banned payloads on request submission so that the timeline
advancement continues on in the background. However, if we are returning
to a preempted request, i915_request_skip() is ineffective and instead we
need to patch up the context image so that it continues from the start
of the next request.

v2: Fixup cancellation so that we only scrub the payload of the active
request and do not short-circuit the breadcrumbs (which might cause
other contexts to execute out of order).

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c|  91 ++---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 273 +
 2 files changed, 341 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 09fc5ecfdd09..809a5dd97c14 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -234,6 +234,9 @@ static void execlists_init_reg_state(u32 *reg_state,
 const struct intel_engine_cs *engine,
 const struct intel_ring *ring,
 bool close);
+static void
+__execlists_update_reg_state(const struct intel_context *ce,
+const struct intel_engine_cs *engine);
 
 static void __context_pin_acquire(struct intel_context *ce)
 {
@@ -256,6 +259,29 @@ static void mark_eio(struct i915_request *rq)
i915_request_mark_complete(rq);
 }
 
+static struct i915_request *active_request(struct i915_request *rq)
+{
+   const struct intel_context * const ce = rq->hw_context;
+   struct i915_request *active = NULL;
+   struct list_head *list;
+
+   if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
+   return rq;
+
+   list = _request_active_timeline(rq)->requests;
+   list_for_each_entry_from_reverse(rq, list, link) {
+   if (i915_request_completed(rq))
+   break;
+
+   if (rq->hw_context != ce)
+   break;
+
+   active = rq;
+   }
+
+   return active;
+}
+
 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
 {
return (i915_ggtt_offset(engine->status_page.vma) +
@@ -977,6 +1003,45 @@ static void kick_siblings(struct i915_request *rq, struct 
intel_context *ce)
tasklet_schedule(>base.execlists.tasklet);
 }
 
+static void cancel_active(struct i915_request *rq,
+ struct intel_engine_cs *engine)
+{
+   struct intel_context * const ce = rq->hw_context;
+   u32 *regs = ce->lrc_reg_state;
+
+   /*
+* The executing context has been cancelled. Fixup the context so that
+* it continues on from the breadcrumb after the batch and will be
+* marked as incomplete [-EIO] upon signaling. We preserve the
+* breadcrumbs and semaphores of the subsequent requests so that
+* inter-timeline dependencies remain correctly ordered.
+*/
+   GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
+ __func__, engine->name, rq->fence.context, rq->fence.seqno);
+
+   __context_pin_acquire(ce);
+
+   /* On resubmission of the active request, it's payload be scrubbed */
+   rq = active_request(rq);
+   if (rq)
+   ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
+   else
+   ce->ring->head = ce->ring->tail;
+
+   /* Scrub the context image to prevent replaying the previous batch */
+   memcpy(regs, /* skip restoring the vanilla PPHWSP */
+  engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
+  engine->context_size - PAGE_SIZE);
+
+   execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+   __execlists_update_reg_state(ce, engine);
+
+   /* We've switched away, so this should be a no-op, but intent matters */
+   ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+
+   __context_pin_release(ce);
+}
+
 static inline void
 __execlists_schedule_out(struct i915_request *rq,
 struct intel_engine_cs * const engine)
@@ -987,6 +1052,9 @@ __execlists_schedule_out(struct i915_request *rq,
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
intel_gt_pm_put(engine->gt);
 
+   if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
+   cancel_active(rq, engine);
+
/*
 * If this is part of a virtual engine, its next request may
 * have been blocked waiting for access to the active context.
@@ -2776,29 +2844,6 @@ static void reset_csb_pointers(struct intel_engine_cs 
*engine)
   >csb_statu

Re: [Intel-gfx] [PATCH 07/10] drm/i915/execlists: Cancel banned contexts on schedule-out

2019-10-11 Thread Chris Wilson
Quoting Chris Wilson (2019-10-11 11:15:58)
> Quoting Tvrtko Ursulin (2019-10-11 10:47:26)
> > > + if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
> > > + cancel_active(rq, engine);
> > 
> > Or you are counting this is already the last runnable request from this 
> > context due coalescing? It wouldn't work if for any reason coalescing 
> > would be prevented. Either with GVT, or I had some ideas to prevent 
> > coalescing for contexts where watchdog is enabled in the future. In 
> > which case this would be a hidden gotcha. Maybe all that's needed in 
> > mark_complete is also to look towards the end of the list?
> 
> I'm not following. We are looking at the context here, which is track by
> the last request submitted for that context.

Oh I see, you were pointing out that I had not walked back along the context
to find the incomplete request for correct patching.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/execlists: Only mark incomplete requests as -EIO on cancelling

2019-10-11 Thread Chris Wilson
Only the requests that have not completed do we want to change the
status of to signal the -EIO when cancelling the inflight set of requests
upon wedging.

Reported-by: Tvrtko Ursulin 
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index b00499cc7586..1f4772329021 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -247,8 +247,12 @@ static void __context_pin_release(struct intel_context *ce)
 
 static void mark_eio(struct i915_request *rq)
 {
-   if (!i915_request_signaled(rq))
-   dma_fence_set_error(>fence, -EIO);
+   if (i915_request_completed(rq))
+   return;
+
+   GEM_BUG_ON(i915_request_signaled(rq));
+
+   dma_fence_set_error(>fence, -EIO);
i915_request_mark_complete(rq);
 }
 
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 07/10] drm/i915/execlists: Cancel banned contexts on schedule-out

2019-10-11 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-10-11 10:47:26)
> > +static void
> > +mark_complete(struct i915_request *rq, struct intel_engine_cs *engine)
> > +{
> > + const struct intel_timeline * const tl = 
> > rcu_dereference(rq->timeline);
> > +
> > + *(u32 *)tl->hwsp_seqno = rq->fence.seqno;
> > + GEM_BUG_ON(!i915_request_completed(rq));
> > +
> > + list_for_each_entry_from_reverse(rq, >requests, link) {
> > + if (i915_request_signaled(rq))
> > + break;
> > +
> > + mark_eio(rq);
> 
> This would -EIO requests which have potentially be completed but not 
> retired yet? If so why?

Hmm. That's a bit of an oversight, yes.

> > + }
> > +
> > + intel_engine_queue_breadcrumbs(engine);
> > +}
> > +
> > +static void cancel_active(struct i915_request *rq,
> > +   struct intel_engine_cs *engine)
> > +{
> > + struct intel_context * const ce = rq->hw_context;
> > + u32 *regs = ce->lrc_reg_state;
> > +
> > + if (i915_request_completed(rq))
> > + return;
> > +
> > + GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
> > +   __func__, engine->name, rq->fence.context, rq->fence.seqno);
> > + __context_pin_acquire(ce);
> > +
> > + /* Scrub the context image to prevent replaying the previous batch */
> > + memcpy(regs, /* skip restoring the vanilla PPHWSP */
> > +engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
> > +engine->context_size - PAGE_SIZE);
> 
> context_size - LRC_STATE_PN * PAGE_SIZE ?

context_size excludes the guc header pages, so it's a bit of a kerfuffle.
 
> > + execlists_init_reg_state(regs, ce, engine, ce->ring, false);
> > +
> > + /* Ring will be advanced on retire; here we need to reset the context 
> > */
> > + ce->ring->head = intel_ring_wrap(ce->ring, rq->wa_tail);
> > + __execlists_update_reg_state(ce, engine);
> > +
> > + /* We've switched away, so this should be a no-op, but intent matters 
> > */
> > + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
> > +
> > + /* Let everyone know that the request may now be retired */
> > + rcu_read_lock();
> > + mark_complete(rq, engine);
> > + rcu_read_unlock();
> > +
> > + __context_pin_release(ce);
> > +}
> > +
> >   static inline void
> >   __execlists_schedule_out(struct i915_request *rq,
> >struct intel_engine_cs * const engine)
> > @@ -1032,6 +1087,9 @@ __execlists_schedule_out(struct i915_request *rq,
> >   execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
> >   intel_gt_pm_put(engine->gt);
> >   
> > + if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
> > + cancel_active(rq, engine);
> 
> Or you are counting this is already the last runnable request from this 
> context due coalescing? It wouldn't work if for any reason coalescing 
> would be prevented. Either with GVT, or I had some ideas to prevent 
> coalescing for contexts where watchdog is enabled in the future. In 
> which case this would be a hidden gotcha. Maybe all that's needed in 
> mark_complete is also to look towards the end of the list?

I'm not following. We are looking at the context here, which is track by
the last request submitted for that context.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 07/10] drm/i915/execlists: Cancel banned contexts on schedule-out

2019-10-11 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-10-11 10:47:26)
> 
> On 10/10/2019 08:14, Chris Wilson wrote:
> > On completion of a banned context, scrub the context image so that we do
> 
> s/completion/schedule out/ like in the subject line? Otherwise I 
> struggle to understand how banned context is completing. Presumably it 
> was banned because it keeps hanging.

Ok, I had the CS completion event in mind, but i915_request_completed()
does muddle the waters.
 
> > not replay the active payload. The intent is that we skip banned
> > payloads on request submission so that the timeline advancement
> > continues on in the background. However, if we are returning to a
> > preempted request, i915_request_skip() is ineffective and instead we
> > need to patch up the context image so that it continues from the start
> > of the next request.
> 
> But if the context is banned why do we want to continue from the start 
> of the next request? Don't we want to zap all submitted so far?

We scrub the payload, but the request itself is still a vital part of
the web of dependencies. That is we still execute the semaphores and
breadcrumbs of the cancelled requests to maintain global ordering.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 06/10] drm/i915/gt: Introduce barrier pulses along engines

2019-10-11 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-10-11 10:11:58)
> 
> On 10/10/2019 08:14, Chris Wilson wrote:
> > +#include "intel_context.h"
> > +#include "intel_engine_heartbeat.h"
> > +#include "intel_engine_pm.h"
> > +#include "intel_engine.h"
> > +#include "intel_gt.h"
> > +
> > +static void idle_pulse(struct intel_engine_cs *engine, struct i915_request 
> > *rq)
> > +{
> > + engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
> > + i915_request_add_active_barriers(rq);
> 
> Why do you need active barriers with the idle pulse? Just because it is 
> a handy point to release the previously pinned contexts? But they may 
> get reused as soon as idle pulse finishes, no?

Yes. It is a known point in time where the other context has finished,
and when this request runs has completed a context switch.

Remember all that time we were arguing about idle barriers and how we
needed to run them periodically to allow them to be reaped and avoid
having the entire aperture pinned with stale contexts forcing a stall.
And avoiding making the idle barriers themselves a global serialising
barrier. :|

The idea we had was that we would take advantage of any guaranteed
context switches and send regular pulses from the kernel context to pick
up stragglers. So we could use any context switch after the we retire
the old context to unpin it, but to keep the locking and preallocations
of the rbtree simple (you've seen i915_active, simple is anything but),
I left it to the engine->kernel_context to manage.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH v2] drm/i915: Expose engine properties via sysfs

2019-10-11 Thread Chris Wilson
Preliminary stub to add engines underneath /sys/class/drm/cardN/, so
that we can expose properties on each engine to the sysadmin.

To start with we have basic analogues of the i915_query ioctl so that we
can pretty print engine discovery from the shell, and flesh out the
directory structure. Later we will add writeable sysadmin properties such
as per-engine timeout controls.

An example tree of the engine properties on Braswell:
/sys/class/drm/card0
└── engine
    ├── bcs0
    │   ├── capabilities
    │   ├── class
    │   ├── instance
    │   └── name
    ├── rcs0
    │   ├── capabilities
    │   ├── class
    │   ├── instance
    │   └── name
    ├── vcs0
    │   ├── capabilities
    │   ├── class
    │   ├── instance
    │   └── name
    └── vecs0
        ├── capabilities
    ├── class
    ├── instance
    └── name

v2: Include stringified capabilities

Signed-off-by: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Tvrtko Ursulin 
Cc: Daniele Ceraolo Spurio 
Cc: Rodrigo Vivi 
Acked-by: Rodrigo Vivi 
---
Tvrtko, you mentioned exposing flags as well, I haven't spotted what
should be included for that field.
---
 drivers/gpu/drm/i915/Makefile|   3 +-
 drivers/gpu/drm/i915/gt/intel_engine_sysfs.c | 175 +++
 drivers/gpu/drm/i915/gt/intel_engine_sysfs.h |  14 ++
 drivers/gpu/drm/i915/i915_sysfs.c|   3 +
 4 files changed, 194 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_sysfs.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index e791d9323b51..cd9a10ba2516 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -78,8 +78,9 @@ gt-y += \
gt/intel_breadcrumbs.o \
gt/intel_context.o \
gt/intel_engine_cs.o \
-   gt/intel_engine_pool.o \
gt/intel_engine_pm.o \
+   gt/intel_engine_pool.o \
+   gt/intel_engine_sysfs.o \
gt/intel_engine_user.o \
gt/intel_gt.o \
gt/intel_gt_irq.o \
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
new file mode 100644
index ..bfc3a4f631a5
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
@@ -0,0 +1,175 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include 
+#include 
+
+#include "i915_drv.h"
+#include "intel_engine.h"
+#include "intel_engine_sysfs.h"
+
+struct kobj_engine {
+   struct kobject base;
+   struct intel_engine_cs *engine;
+};
+
+static struct intel_engine_cs *kobj_to_engine(struct kobject *kobj)
+{
+   return container_of(kobj, struct kobj_engine, base)->engine;
+}
+
+static ssize_t
+name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+   return sprintf(buf, "%s\n", kobj_to_engine(kobj)->name);
+}
+
+static struct kobj_attribute name_attr =
+__ATTR(name, 0444, name_show, NULL);
+
+static ssize_t
+class_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+   return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_class);
+}
+
+static struct kobj_attribute class_attr =
+__ATTR(class, 0444, class_show, NULL);
+
+static ssize_t
+inst_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+   return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_instance);
+}
+
+static struct kobj_attribute inst_attr =
+__ATTR(instance, 0444, inst_show, NULL);
+
+static ssize_t repr_trim(char *buf, ssize_t len)
+{
+   /* Trim off the trailing space */
+   if (len > PAGE_SIZE)
+   len = PAGE_SIZE;
+   if (len > 0)
+   buf[--len] = '\0';
+
+   return len;
+}
+
+static ssize_t
+caps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+   static const char *vcs_repr[] = {
+  [ilog2(I915_VIDEO_CLASS_CAPABILITY_HEVC)] = "hevc",
+  [ilog2(I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC)] = "sfc",
+   };
+   static const char *vecs_repr[] = {
+  [ilog2(I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC)] = "sfc",
+   };
+   struct intel_engine_cs *engine = kobj_to_engine(kobj);
+   const char **repr;
+   int num_repr, n;
+   ssize_t len;
+
+   switch (engine->class) {
+   case VIDEO_DECODE_CLASS:
+   repr = vcs_repr;
+   num_repr = ARRAY_SIZE(vcs_repr);
+   break;
+
+   case VIDEO_ENHANCEMENT_CLASS:
+   repr = vecs_repr;
+   num_repr = ARRAY_SIZE(vecs_repr);
+   break;
+
+   default:
+   repr = NULL;
+   num_repr = 0;
+   break;
+   }
+
+   len = 0;
+   for_each_set_bit(n, (uns

Re: [Intel-gfx] [PATCH 03/10] drm/i915: Expose engine properties via sysfs

2019-10-11 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-10-11 09:44:16)
> 
> On 10/10/2019 08:14, Chris Wilson wrote:
> > Preliminary stub to add engines underneath /sys/class/drm/cardN/, so
> > that we can expose properties on each engine to the sysadmin.
> > 
> > To start with we have basic analogues of the i915_query ioctl so that we
> > can pretty print engine discovery from the shell, and flesh out the
> > directory structure. Later we will add writeable sysadmin properties such
> > as per-engine timeout controls.
> > 
> > An example tree of the engine properties on Braswell:
> >  /sys/class/drm/card0
> >  └── engine
> >      ├── bcs0
> >      │   ├── class
> >      │   ├── heartbeat_interval_ms
> 
> Not present in this patch.

I did say an example tree, not this tree :)

> >      │   ├── instance
> >      │   ├── mmio_base
> 
> I vote for putting mmio_base in a followup patch.

Darn your eagle eyes ;)

> 
> And how about we add capabilities in the first patch? So we get another 
> way of engine discovery. Ideally with mapping of bits to user friendly 
> strings.

Right, I was about to ask if we should do a /proc/cpuinfo style
capabilities. Do we need both? Or just stick to the more human readable
output for sysfs?
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH] drm/i915: Honour O_NONBLOCK before throttling execbuf submissions

2019-10-11 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-10-11 09:20:12)
> 
> On 10/10/2019 14:58, Chris Wilson wrote:
> > Quoting Chris Wilson (2019-10-10 14:48:49)
> >> Check the user's flags on the struct file before deciding whether or not
> >> to stall before submitting a request. This allows us to reasonably
> >> cheaply honour O_NONBLOCK without checking at more critical phases
> >> during request submission.
> > 
> > One might reasonably expect poll(POLLOUT) to be supported as well in
> > this case :|
> 
> That doesn't kind of fit - mismatch between one fd and multiple 
> contexts, no? Or you could signal POLLOUT on any, or on all have space. 
> But that's taking it too far. :)

Aye, that's what I was thinking of with the ugpu comment, where one fd is
one user submit queue.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH i-g-t] i915: Use O_NONBLOCK for faster ringsize probing

2019-10-11 Thread Chris Wilson
When the kernel supports O_NONBLOCK reporting of a full execbuf queue,
take advantage of that to immediately report when the output would block
due to the ring being full.

Signed-off-by: Chris Wilson 
---
 lib/i915/gem_ring.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/lib/i915/gem_ring.c b/lib/i915/gem_ring.c
index 9f099edff..5ca2a728b 100644
--- a/lib/i915/gem_ring.c
+++ b/lib/i915/gem_ring.c
@@ -21,6 +21,7 @@
  * IN THE SOFTWARE.
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -89,11 +90,16 @@ __gem_measure_ring_inflight(int fd, unsigned int engine, 
enum measure_ring_flags
 
count = 0;
do {
-   if (__execbuf(fd, ) == 0) {
+   int err = __execbuf(fd, );
+
+   if (err == 0) {
count++;
continue;
}
 
+   if (err == -EWOULDBLOCK)
+   break;
+
if (last[1] == count)
break;
 
@@ -102,8 +108,6 @@ __gem_measure_ring_inflight(int fd, unsigned int engine, 
enum measure_ring_flags
last[1] = last[0];
last[0] = count;
} while (1);
-
-   igt_assert_eq(__execbuf(fd, ), -EINTR);
igt_assert(count > 2);
 
memset(, 0, sizeof(itv));
@@ -145,6 +149,9 @@ gem_measure_ring_inflight(int fd, unsigned int engine, enum 
measure_ring_flags f
 
fd = gem_reopen_driver(fd);
 
+   /* When available, disable execbuf throttling */
+   fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | O_NONBLOCK);
+
if (engine == ALL_ENGINES) {
for_each_physical_engine(fd, engine) {
unsigned int count =
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/perf: Allow dynamic reconfiguration of the OA stream

2019-10-10 Thread Chris Wilson
Introduce a new perf_ioctl command to change the OA configuration of the
active stream. This allows the OA stream to be reconfigured between
batch buffers, giving greater flexibility in sampling. We inject a
request into the OA context to reconfigure the stream asynchronously on
the GPU in between and ordered with execbuffer calls.

Original patch for dynamic reconfiguration by Lionel Landwerlin.

Signed-off-by: Chris Wilson 
Reviewed-by: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/i915_perf.c | 49 +++-
 include/uapi/drm/i915_drm.h  |  9 ++
 2 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c2431b5a1f55..5daaf8d0bdc3 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2860,6 +2860,43 @@ static void i915_perf_disable_locked(struct 
i915_perf_stream *stream)
stream->ops->disable(stream);
 }
 
+static long i915_perf_config_locked(struct i915_perf_stream *stream,
+   unsigned long metrics_set)
+{
+   struct i915_oa_config *config;
+   long ret = stream->oa_config->id;
+
+   config = i915_perf_get_oa_config(stream->perf, metrics_set);
+   if (!config)
+   return -EINVAL;
+
+   if (config != stream->oa_config) {
+   struct intel_context *ce;
+   int err;
+
+   /*
+* If OA is bound to a specific context, emit the
+* reconfiguration inline from that context. The update
+* will then be ordered with respect to submission on that
+* context.
+*
+* When set globally, we use a low priority kernel context,
+* so it will effectively take effect when idle.
+*/
+   ce = stream->pinned_ctx ?: stream->engine->kernel_context;
+
+   err = emit_oa_config(stream, ce);
+   if (err == 0)
+   config = xchg(>oa_config, config);
+   else
+   ret = err;
+   }
+
+   i915_oa_config_put(config);
+
+   return ret;
+}
+
 /**
  * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
  * @stream: An i915 perf stream
@@ -2883,6 +2920,8 @@ static long i915_perf_ioctl_locked(struct 
i915_perf_stream *stream,
case I915_PERF_IOCTL_DISABLE:
i915_perf_disable_locked(stream);
return 0;
+   case I915_PERF_IOCTL_CONFIG:
+   return i915_perf_config_locked(stream, arg);
}
 
return -EINVAL;
@@ -4020,7 +4059,15 @@ void i915_perf_fini(struct drm_i915_private *i915)
  */
 int i915_perf_ioctl_version(void)
 {
-   return 1;
+   /*
+* 1: Initial version
+*   I915_PERF_IOCTL_ENABLE
+*   I915_PERF_IOCTL_DISABLE
+*
+* 2: Added runtime modification of OA config.
+*   I915_PERF_IOCTL_CONFIG
+*/
+   return 2;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 0c7b2815fbf1..0a44438c8fbb 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1932,6 +1932,15 @@ struct drm_i915_perf_open_param {
  */
 #define I915_PERF_IOCTL_DISABLE_IO('i', 0x1)
 
+/**
+ * Change metrics_set captured by a stream.
+ *
+ * Returns the previously bound metrics set id, or a negative error code.
+ *
+ * This ioctl is available in perf revision 2.
+ */
+#define I915_PERF_IOCTL_CONFIG _IO('i', 0x2)
+
 /**
  * Common to all i915 perf records
  */
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH] drm/i915: Don't disable interrupts independently of the lock

2019-10-10 Thread Chris Wilson
Quoting Sebastian Andrzej Siewior (2019-10-10 19:26:10)
> On 2019-10-10 19:11:27 [+0100], Chris Wilson wrote:
> > > --- a/drivers/gpu/drm/i915/i915_request.c
> > > +++ b/drivers/gpu/drm/i915/i915_request.c
> > > @@ -251,15 +251,13 @@ static bool i915_request_retire(struct i
> > > active->retire(active, rq);
> > > }
> > >  
> > > -   local_irq_disable();
> > > -
> > > /*
> > >  * We only loosely track inflight requests across preemption,
> > >  * and so we may find ourselves attempting to retire a _completed_
> > >  * request that we have removed from the HW and put back on a run
> > >  * queue.
> > >  */
> > > -   spin_lock(>engine->active.lock);
> > > +   spin_lock_irq(>engine->active.lock);
> > > list_del(>sched.link);
> > > spin_unlock(>engine->active.lock);
> > >  
> > > @@ -278,9 +276,7 @@ static bool i915_request_retire(struct i
> > > __notify_execute_cb(rq);
> > > }
> > > GEM_BUG_ON(!list_empty(>execute_cb));
> > > -   spin_unlock(>lock);
> > > -
> > > -   local_irq_enable();
> > > +   spin_unlock_irq(>lock);
> > 
> > Nothing screams about the imbalance? irq off from one lock to the other?
> 
> There is no imbalance, is there? Interrupts are disabled as part of
> acquiring the first lock and enabled again as part of releasing the
> second lock.
> It may not look beautiful. 

Sure, it's at the same scope, I just expect at some point lockdep to
complain :)
 
> I'm just not sure if this
> 
> | spin_lock_irq(>engine->active.lock);
> | list_del(>sched.link);
> | spin_unlock_irq(>engine->active.lock);
> | 
> | spin_lock_irq(>lock);
> | i915_request_mark_complete(rq);
> …
> | spin_unlock_irq(>lock);
> 
> has been avoided because an interrupt here could change something or if
> this is just an optimisation.

Just avoiding the back-to-back enable/disable.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/perf: implement active wait for noa configurations

2019-10-10 Thread Chris Wilson
From: Lionel Landwerlin 

NOA configuration take some amount of time to apply. That amount of
time depends on the size of the GT. There is no documented time for
this. For example, past experimentations with powergating
configuration changes seem to indicate a 60~70us delay. We go with
500us as default for now which should be over the required amount of
time (according to HW architects).

v2: Don't forget to save/restore registers used for the wait (Chris)

v3: Name used CS_GPR registers (Chris)
Fix compile issue due to rebase (Lionel)

v4: Fix save/restore helpers (Umesh)

v5: Move noa_wait from drm_i915_private to i915_perf_stream (Lionel)

v6: Add missing struct declarations in i915_perf.h

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson  (v4)
Signed-off-by: Chris Wilson  (v4)
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |   4 +-
 drivers/gpu/drm/i915/gt/intel_gt_types.h  |   5 +
 drivers/gpu/drm/i915/i915_debugfs.c   |  32 +++
 drivers/gpu/drm/i915/i915_perf.c  | 224 ++
 drivers/gpu/drm/i915/i915_perf_types.h|   8 +
 drivers/gpu/drm/i915/i915_reg.h   |   4 +-
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 drivers/gpu/drm/i915/selftests/i915_perf.c| 216 +
 8 files changed, 492 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/selftests/i915_perf.c

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 0987100c786b..8e63cffcabe0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -163,7 +163,8 @@
 #define MI_BATCH_BUFFER_START  MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
 #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1)
-#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
+#define   MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
+#define   MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/
 
 /*
  * 3D instructions used by the kernel
@@ -224,6 +225,7 @@
 #define   PIPE_CONTROL_CS_STALL(1<<20)
 #define   PIPE_CONTROL_TLB_INVALIDATE  (1<<18)
 #define   PIPE_CONTROL_MEDIA_STATE_CLEAR   (1<<16)
+#define   PIPE_CONTROL_WRITE_TIMESTAMP (3<<14)
 #define   PIPE_CONTROL_QW_WRITE(1<<14)
 #define   PIPE_CONTROL_POST_SYNC_OP_MASK(3<<14)
 #define   PIPE_CONTROL_DEPTH_STALL (1<<13)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 802f516a3430..be4b263621c8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -109,6 +109,11 @@ enum intel_gt_scratch_field {
/* 8 bytes */
INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
 
+   /* 6 * 8 bytes */
+   INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048,
+
+   /* 4 bytes */
+   INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096,
 };
 
 #endif /* __INTEL_GT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 277f31297f29..d463a28b7475 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3590,6 +3590,37 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
i915_wedged_get, i915_wedged_set,
"%llu\n");
 
+static int
+i915_perf_noa_delay_set(void *data, u64 val)
+{
+   struct drm_i915_private *i915 = data;
+   const u32 clk = RUNTIME_INFO(i915)->cs_timestamp_frequency_khz;
+
+   /*
+* This would lead to infinite waits as we're doing timestamp
+* difference on the CS with only 32bits.
+*/
+   if (val > mul_u32_u32(U32_MAX, clk))
+   return -EINVAL;
+
+   atomic64_set(>perf.noa_programming_delay, val);
+   return 0;
+}
+
+static int
+i915_perf_noa_delay_get(void *data, u64 *val)
+{
+   struct drm_i915_private *i915 = data;
+
+   *val = atomic64_read(>perf.noa_programming_delay);
+   return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops,
+   i915_perf_noa_delay_get,
+   i915_perf_noa_delay_set,
+   "%llu\n");
+
 #define DROP_UNBOUND   BIT(0)
 #define DROP_BOUND BIT(1)
 #define DROP_RETIREBIT(2)
@@ -4340,6 +4371,7 @@ static const struct i915_debugfs_files {
const char *name;
const struct file_operations *fops;
 } i915_debugfs_files[] = {
+   {"i915_perf_noa_delay", _perf_noa_delay_fops},
{"i915_wedged", _wedged_fops},
{"i915_cache_sharing", _cache_sharing_fops},
{"i915_gem_drop_caches", _drop_caches_fops},
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/driver

[Intel-gfx] [PATCH] drm/i915/perf: Allow dynamic reconfiguration of the OA stream

2019-10-10 Thread Chris Wilson
Introduce a new perf_ioctl command to change the OA configuration of the
active stream. This allows the OA stream to be reconfigured between
batch buffers, giving greater flexibility in sampling. We inject a
request into the OA context to reconfigure the stream asynchronously on
the GPU in between and ordered with execbuffer calls.

Original patch for dynamic reconfiguration by Lionel Landwerlin.

Signed-off-by: Chris Wilson 
Reviewed-by: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/i915_perf.c | 46 +++-
 include/uapi/drm/i915_drm.h  | 10 +++
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c37fe275cf33..0459e66eba36 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2860,6 +2860,40 @@ static void i915_perf_disable_locked(struct 
i915_perf_stream *stream)
stream->ops->disable(stream);
 }
 
+static long i915_perf_config_locked(struct i915_perf_stream *stream,
+   unsigned long metrics_set)
+{
+   struct i915_oa_config *config;
+   int err = 0;
+
+   config = i915_perf_get_oa_config(stream->perf, metrics_set);
+   if (!config)
+   return -EINVAL;
+
+   if (config != stream->oa_config) {
+   struct intel_context *ce;
+
+   /*
+* If OA is bound to a specific context, emit the
+* reconfiguration inline from that context. The update
+* will then be ordered with respect to submission on that
+* context.
+*
+* When set globally, we use a low priority kernel context,
+* so it will effectively take effect when idle.
+*/
+   ce = stream->pinned_ctx ?: stream->engine->kernel_context;
+
+   err = emit_oa_config(stream, ce);
+   if (err == 0)
+   config = xchg(>oa_config, config);
+   }
+
+   i915_oa_config_put(config);
+
+   return err;
+}
+
 /**
  * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
  * @stream: An i915 perf stream
@@ -2883,6 +2917,8 @@ static long i915_perf_ioctl_locked(struct 
i915_perf_stream *stream,
case I915_PERF_IOCTL_DISABLE:
i915_perf_disable_locked(stream);
return 0;
+   case I915_PERF_IOCTL_CONFIG:
+   return i915_perf_config_locked(stream, arg);
}
 
return -EINVAL;
@@ -4020,7 +4056,15 @@ void i915_perf_fini(struct drm_i915_private *i915)
  */
 int i915_perf_ioctl_version(void)
 {
-   return 1;
+   /*
+* 1: Initial version
+*   I915_PERF_IOCTL_ENABLE
+*   I915_PERF_IOCTL_DISABLE
+*
+* 2: Added runtime modification of OA config.
+*   I915_PERF_IOCTL_CONFIG
+*/
+   return 2;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 0c7b2815fbf1..5e66f7c60261 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1932,6 +1932,16 @@ struct drm_i915_perf_open_param {
  */
 #define I915_PERF_IOCTL_DISABLE_IO('i', 0x1)
 
+/**
+ * Change metrics_set captured by a stream.
+ *
+ * Will not take effect until the stream is restart, or upon the next
+ * execbuf when attached to a specific context.
+ *
+ * This ioctl is available in perf revision 2.
+ */
+#define I915_PERF_IOCTL_CONFIG _IO('i', 0x2)
+
 /**
  * Common to all i915 perf records
  */
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 7/9] drm/i915/perf: Allow dynamic reconfiguration of the OA stream

2019-10-10 Thread Chris Wilson
Quoting Lionel Landwerlin (2019-10-10 17:07:11)
> Yeah, that's a fine interface actually.

One last thought for the interface, should we return the previous
config-id?

That limits the config-id space to long (min s32) so that we can report
the negative error code (or exclude the top 4095 values).
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [CI 7/9] drm/i915/perf: Allow dynamic reconfiguration of the OA stream

2019-10-10 Thread Chris Wilson
Introduce a new perf_ioctl command to change the OA configuration of the
active stream. This allows the OA stream to be reconfigured between
batch buffers, giving greater flexibility in sampling. We inject a
request into the OA context to reconfigure the stream asynchronously on
the GPU in between and ordered with execbuffer calls.

Original patch for dynamic reconfiguration by Lionel Landwerlin.

Signed-off-by: Chris Wilson 
Reviewed-by: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/i915_perf.c | 34 +++-
 include/uapi/drm/i915_drm.h  | 10 ++
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c37fe275cf33..001fb249aaec 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2860,6 +2860,28 @@ static void i915_perf_disable_locked(struct 
i915_perf_stream *stream)
stream->ops->disable(stream);
 }
 
+static int i915_perf_config_locked(struct i915_perf_stream *stream,
+  unsigned long metrics_set)
+{
+   struct i915_oa_config *config;
+   int err = 0;
+
+   config = i915_perf_get_oa_config(stream->perf, metrics_set);
+   if (!config)
+   return -EINVAL;
+
+   if (config != stream->oa_config) {
+   if (stream->pinned_ctx)
+   err = emit_oa_config(stream, stream->pinned_ctx);
+   if (err == 0)
+   config = xchg(>oa_config, config);
+   }
+
+   i915_oa_config_put(config);
+
+   return err;
+}
+
 /**
  * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
  * @stream: An i915 perf stream
@@ -2883,6 +2905,8 @@ static long i915_perf_ioctl_locked(struct 
i915_perf_stream *stream,
case I915_PERF_IOCTL_DISABLE:
i915_perf_disable_locked(stream);
return 0;
+   case I915_PERF_IOCTL_CONFIG:
+   return i915_perf_config_locked(stream, arg);
}
 
return -EINVAL;
@@ -4020,7 +4044,15 @@ void i915_perf_fini(struct drm_i915_private *i915)
  */
 int i915_perf_ioctl_version(void)
 {
-   return 1;
+   /*
+* 1: Initial version
+*   I915_PERF_IOCTL_ENABLE
+*   I915_PERF_IOCTL_DISABLE
+*
+* 2: Added runtime modification of OA config.
+*   I915_PERF_IOCTL_CONFIG
+*/
+   return 2;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 0c7b2815fbf1..5e66f7c60261 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1932,6 +1932,16 @@ struct drm_i915_perf_open_param {
  */
 #define I915_PERF_IOCTL_DISABLE_IO('i', 0x1)
 
+/**
+ * Change metrics_set captured by a stream.
+ *
+ * Will not take effect until the stream is restart, or upon the next
+ * execbuf when attached to a specific context.
+ *
+ * This ioctl is available in perf revision 2.
+ */
+#define I915_PERF_IOCTL_CONFIG _IO('i', 0x2)
+
 /**
  * Common to all i915 perf records
  */
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm

2019-10-10 Thread Chris Wilson
As we now have a specific engine to use OA on, exchange the top-level
runtime-pm wakeref with the engine-pm. This still results in the same
top-level runtime-pm, but with more nuances to keep the engine and its
gt awake.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_perf.c   | 8 
 drivers/gpu/drm/i915/i915_perf_types.h | 6 --
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 77c3cef64548..c4a436dfb7db 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -196,7 +196,7 @@
 #include 
 
 #include "gem/i915_gem_context.h"
-#include "gem/i915_gem_pm.h"
+#include "gt/intel_engine_pm.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_lrc_reg.h"
 
@@ -1353,7 +1353,7 @@ static void i915_oa_stream_destroy(struct 
i915_perf_stream *stream)
free_oa_buffer(stream);
 
intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
-   intel_runtime_pm_put(stream->uncore->rpm, stream->wakeref);
+   intel_engine_pm_put(stream->engine);
 
if (stream->ctx)
oa_put_render_ctx_id(stream);
@@ -2218,7 +2218,7 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
 *   In our case we are expecting that taking pm + FORCEWAKE
 *   references will effectively disable RC6.
 */
-   stream->wakeref = intel_runtime_pm_get(stream->uncore->rpm);
+   intel_engine_pm_get(stream->engine);
intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL);
 
ret = alloc_oa_buffer(stream);
@@ -2252,7 +2252,7 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
put_oa_config(stream->oa_config);
 
intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
-   intel_runtime_pm_put(stream->uncore->rpm, stream->wakeref);
+   intel_engine_pm_put(stream->engine);
 
 err_config:
if (stream->ctx)
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h 
b/drivers/gpu/drm/i915/i915_perf_types.h
index a91ae2d1a543..eb8d1ebd5095 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -134,12 +134,6 @@ struct i915_perf_stream {
 */
struct intel_uncore *uncore;
 
-   /**
-* @wakeref: As we keep the device awake while the perf stream is
-* active, we track our runtime pm reference for later release.
-*/
-   intel_wakeref_t wakeref;
-
/**
 * @engine: Engine associated with this performance stream.
 */
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [CI 3/9] drm/i915/perf: allow for CS OA configs to be created lazily

2019-10-10 Thread Chris Wilson
From: Lionel Landwerlin 

Here we introduce a mechanism by which the execbuf part of the i915
driver will be able to request that a batch buffer containing the
programming for a particular OA config be created.

We'll execute these OA configuration buffers right before executing a
set of userspace commands so that a particular user batchbuffer be
executed with a given OA configuration.

This mechanism essentially allows the userspace driver to go through
several OA configuration without having to open/close the i915/perf
stream.

v2: No need for locking on object OA config object creation (Chris)
Flush cpu mapping of OA config (Chris)

v3: Properly deal with the perf_metric lock (Chris/Lionel)

v4: Fix oa config unref/put when not found (Lionel)

v5: Allocate BOs for configurations on the stream instead of globally
(Lionel)

v6: Fix 64bit division (Chris)

v7: Store allocated config BOs into the stream (Lionel)

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson  (v4)
Signed-off-by: Chris Wilson  (v4)
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |   1 +
 drivers/gpu/drm/i915/i915_perf.c | 107 +++
 drivers/gpu/drm/i915/i915_perf.h |  24 +
 drivers/gpu/drm/i915/i915_perf_types.h   |  23 ++--
 4 files changed, 102 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index b0227ab2fe1b..0987100c786b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -138,6 +138,7 @@
 /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
 #define   MI_LRI_CS_MMIO   (1<<19)
 #define   MI_LRI_FORCE_POSTED  (1<<12)
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
 #define MI_STORE_REGISTER_MEMMI_INSTR(0x24, 1)
 #define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
 #define   MI_SRM_LRM_GLOBAL_GTT(1<<22)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 0b51ab3ab523..5fa0df46fcc3 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -369,52 +369,52 @@ struct perf_open_properties {
struct intel_engine_cs *engine;
 };
 
+struct i915_oa_config_bo {
+   struct llist_node node;
+
+   struct i915_oa_config *oa_config;
+   struct i915_vma *vma;
+};
+
 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
 
-static void free_oa_config(struct i915_oa_config *oa_config)
+void i915_oa_config_release(struct kref *ref)
 {
+   struct i915_oa_config *oa_config =
+   container_of(ref, typeof(*oa_config), ref);
+
if (!PTR_ERR(oa_config->flex_regs))
kfree(oa_config->flex_regs);
if (!PTR_ERR(oa_config->b_counter_regs))
kfree(oa_config->b_counter_regs);
if (!PTR_ERR(oa_config->mux_regs))
kfree(oa_config->mux_regs);
-   kfree(oa_config);
-}
-
-static void put_oa_config(struct i915_oa_config *oa_config)
-{
-   if (!atomic_dec_and_test(_config->ref_count))
-   return;
 
-   free_oa_config(oa_config);
+   kfree_rcu(oa_config, rcu);
 }
 
-static int get_oa_config(struct i915_perf *perf,
-int metrics_set,
-struct i915_oa_config **out_config)
+struct i915_oa_config *
+i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
 {
-   int ret;
-
-   if (metrics_set == 1) {
-   *out_config = >test_config;
-   atomic_inc(>test_config.ref_count);
-   return 0;
-   }
-
-   ret = mutex_lock_interruptible(>metrics_lock);
-   if (ret)
-   return ret;
+   struct i915_oa_config *oa_config;
 
-   *out_config = idr_find(>metrics_idr, metrics_set);
-   if (!*out_config)
-   ret = -EINVAL;
+   rcu_read_lock();
+   if (metrics_set == 1)
+   oa_config = >test_config;
else
-   atomic_inc(&(*out_config)->ref_count);
+   oa_config = idr_find(>metrics_idr, metrics_set);
+   if (oa_config)
+   oa_config = i915_oa_config_get(oa_config);
+   rcu_read_unlock();
 
-   mutex_unlock(>metrics_lock);
+   return oa_config;
+}
 
-   return ret;
+static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
+{
+   i915_oa_config_put(oa_bo->oa_config);
+   i915_vma_put(oa_bo->vma);
+   kfree(oa_bo);
 }
 
 static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
@@ -1337,6 +1337,16 @@ free_oa_buffer(struct i915_perf_stream *stream)
stream->oa_buffer.vaddr = NULL;
 }
 
+static void
+free_oa_configs(struct i915_perf_stream *stream)
+{
+   struct i915_oa_config_bo *oa_bo, *tmp;
+
+   i915_oa_config_put(stream->oa_confi

[Intel-gfx] [CI 8/9] drm/i915/perf: allow holding preemption on filtered ctx

2019-10-10 Thread Chris Wilson
From: Lionel Landwerlin 

We would like to make use of perf in Vulkan. The Vulkan API is much
lower level than OpenGL, with applications directly exposed to the
concept of command buffers (pretty much equivalent to our batch
buffers). In Vulkan, queries are always limited in scope to a command
buffer. In OpenGL, the lack of command buffer concept meant that
queries' duration could span multiple command buffers.

With that restriction gone in Vulkan, we would like to simplify
measuring performance just by measuring the deltas between the counter
snapshots written by 2 MI_RECORD_PERF_COUNT commands, rather than the
more complex scheme we currently have in the GL driver, using 2
MI_RECORD_PERF_COUNT commands and doing some post processing on the
stream of OA reports, coming from the global OA buffer, to remove any
unrelated deltas in between the 2 MI_RECORD_PERF_COUNT.

Disabling preemption only apply to a single context with which want to
query performance counters for and is considered a privileged
operation, by default protected by CAP_SYS_ADMIN. It is possible to
enable it for a normal user by disabling the paranoid stream setting.

v2: Store preemption setting in intel_context (Chris)

v3: Use priorities to avoid preemption rather than the HW mechanism

v4: Just modify the port priority reporting function

v5: Add nopreempt flag on gem context and always flag requests
appropriately, regarless of OA reconfiguration.

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson 
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.h   | 18 ++
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c|  3 ++
 drivers/gpu/drm/i915/i915_perf.c  | 34 +--
 drivers/gpu/drm/i915/i915_perf_types.h|  8 +
 include/uapi/drm/i915_drm.h   | 11 ++
 6 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 9234586830d1..cfe80590f0ed 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -114,6 +114,24 @@ i915_gem_context_clear_user_engines(struct 
i915_gem_context *ctx)
clear_bit(CONTEXT_USER_ENGINES, >flags);
 }
 
+static inline bool
+i915_gem_context_nopreempt(const struct i915_gem_context *ctx)
+{
+   return test_bit(CONTEXT_NOPREEMPT, >flags);
+}
+
+static inline void
+i915_gem_context_set_nopreempt(struct i915_gem_context *ctx)
+{
+   set_bit(CONTEXT_NOPREEMPT, >flags);
+}
+
+static inline void
+i915_gem_context_clear_nopreempt(struct i915_gem_context *ctx)
+{
+   clear_bit(CONTEXT_NOPREEMPT, >flags);
+}
+
 static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
 {
return !ctx->file_priv;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index ab8e1367dfc8..fe97b8ba4fda 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -146,6 +146,7 @@ struct i915_gem_context {
 #define CONTEXT_CLOSED 1
 #define CONTEXT_FORCE_SINGLE_SUBMISSION2
 #define CONTEXT_USER_ENGINES   3
+#define CONTEXT_NOPREEMPT  4
 
struct mutex mutex;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 98816c35ffc3..e96901888323 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2077,6 +2077,9 @@ static int eb_submit(struct i915_execbuffer *eb)
if (err)
return err;
 
+   if (i915_gem_context_nopreempt(eb->gem_context))
+   eb->request->flags |= I915_REQUEST_NOPREEMPT;
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 001fb249aaec..980cec647d2d 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -344,6 +344,8 @@ static const struct i915_oa_format 
gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
  * struct perf_open_properties - for validated properties given to open a 
stream
  * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags
  * @single_context: Whether a single or all gpu contexts should be monitored
+ * @hold_preemption: Whether the preemption is disabled for the filtered
+ *   context
  * @ctx_handle: A gem ctx handle for use with @single_context
  * @metrics_set: An ID for an OA unit metric set advertised via sysfs
  * @oa_format: An OA unit HW report format
@@ -359,6 +361,7 @@ struct perf_open_properties {
u32 sample_flags;
 
u64 single_context:1;
+   u64 hold_preemption:1;
u64 ctx_handle;
 
/* OA sampling state */
@@ -2512,6 +2515,8 @@ static int i9

[Intel-gfx] [CI 9/9] drm/i915/execlists: Prevent merging requests with conflicting flags

2019-10-10 Thread Chris Wilson
We set out-of-bound parameters inside the i915_requests.flags field,
such as disabling preemption or marking the end-of-context. We should
not coalesce consecutive requests if they have differing instructions
as we only inspect the last active request in a context. Thus if we
allow a later request to be merged into the same execution context, it
will mask any of the earlier flags.

References: 2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption 
on a request")
Signed-off-by: Chris Wilson 
Cc: Lionel Landwerlin 
Reviewed-by: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 9666d51b7e97..7b43c1852776 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1184,6 +1184,9 @@ static bool can_merge_rq(const struct i915_request *prev,
if (i915_request_completed(next))
return true;
 
+   if (unlikely(prev->flags ^ next->flags) & I915_REQUEST_NOPREEMPT)
+   return false;
+
if (!can_merge_ctx(prev->hw_context, next->hw_context))
return false;
 
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [CI 4/9] drm/i915: add support for perf configuration queries

2019-10-10 Thread Chris Wilson
From: Lionel Landwerlin 

Listing configurations at the moment is supported only through sysfs.
This might cause issues for applications wanting to list
configurations from a container where sysfs isn't available.

This change adds a way to query the number of configurations and their
content through the i915 query uAPI.

v2: Fix sparse warnings (Lionel)
Add support to query configuration using uuid (Lionel)

v3: Fix some inconsistency in uapi header (Lionel)
Fix unlocking when not locked issue (Lionel)
Add debug messages (Lionel)

v4: Fix missing unlock (Dan)

v5: Drop lock when copying config content to userspace (Chris)

v6: Drop lock when copying config list to userspace (Chris)
Fix deadlock when calling i915_perf_get_oa_config() under
perf.metrics_lock (Lionel)
Add i915_oa_config_get() (Chris)

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson 
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_perf.c  |   3 +-
 drivers/gpu/drm/i915/i915_query.c | 295 ++
 include/uapi/drm/i915_drm.h   |  62 ++-
 3 files changed, 357 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 5fa0df46fcc3..7d7baee7febe 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3473,8 +3473,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, 
void *data,
 
GEM_BUG_ON(*arg != oa_config->id);
 
-   sysfs_remove_group(perf->metrics_kobj,
-  _config->sysfs_metric);
+   sysfs_remove_group(perf->metrics_kobj, _config->sysfs_metric);
 
idr_remove(>metrics_idr, *arg);
 
diff --git a/drivers/gpu/drm/i915/i915_query.c 
b/drivers/gpu/drm/i915/i915_query.c
index abac5042da2b..6a68ecc7bb5f 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -7,6 +7,7 @@
 #include 
 
 #include "i915_drv.h"
+#include "i915_perf.h"
 #include "i915_query.h"
 #include 
 
@@ -140,10 +141,304 @@ query_engine_info(struct drm_i915_private *i915,
return len;
 }
 
+static int can_copy_perf_config_registers_or_number(u32 user_n_regs,
+   u64 user_regs_ptr,
+   u32 kernel_n_regs)
+{
+   /*
+* We'll just put the number of registers, and won't copy the
+* register.
+*/
+   if (user_n_regs == 0)
+   return 0;
+
+   if (user_n_regs < kernel_n_regs)
+   return -EINVAL;
+
+   if (!access_ok(u64_to_user_ptr(user_regs_ptr),
+  2 * sizeof(u32) * kernel_n_regs))
+   return -EFAULT;
+
+   return 0;
+}
+
+static int copy_perf_config_registers_or_number(const struct i915_oa_reg 
*kernel_regs,
+   u32 kernel_n_regs,
+   u64 user_regs_ptr,
+   u32 *user_n_regs)
+{
+   u32 r;
+
+   if (*user_n_regs == 0) {
+   *user_n_regs = kernel_n_regs;
+   return 0;
+   }
+
+   *user_n_regs = kernel_n_regs;
+
+   for (r = 0; r < kernel_n_regs; r++) {
+   u32 __user *user_reg_ptr =
+   u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2);
+   u32 __user *user_val_ptr =
+   u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2 +
+   sizeof(u32));
+   int ret;
+
+   ret = __put_user(i915_mmio_reg_offset(kernel_regs[r].addr),
+user_reg_ptr);
+   if (ret)
+   return -EFAULT;
+
+   ret = __put_user(kernel_regs[r].value, user_val_ptr);
+   if (ret)
+   return -EFAULT;
+   }
+
+   return 0;
+}
+
+static int query_perf_config_data(struct drm_i915_private *i915,
+ struct drm_i915_query_item *query_item,
+ bool use_uuid)
+{
+   struct drm_i915_query_perf_config __user *user_query_config_ptr =
+   u64_to_user_ptr(query_item->data_ptr);
+   struct drm_i915_perf_oa_config __user *user_config_ptr =
+   u64_to_user_ptr(query_item->data_ptr +
+   sizeof(struct drm_i915_query_perf_config));
+   struct drm_i915_perf_oa_config user_config;
+   struct i915_perf *perf = >perf;
+   struct i915_oa_config *oa_config;
+   char uuid[UUID_STRING_LEN + 1];
+   u64 config_id;
+   u32 flags, total_size;
+   int ret;
+
+   if (!perf->i915)
+   return -ENODEV;
+
+   total_size =
+   sizeof(struct drm_i915_query_perf_config) +
+   sizeof(struct drm_i915_perf_oa_config);
+
+   

[Intel-gfx] [CI 5/9] drm/i915/perf: implement active wait for noa configurations

2019-10-10 Thread Chris Wilson
From: Lionel Landwerlin 

NOA configuration take some amount of time to apply. That amount of
time depends on the size of the GT. There is no documented time for
this. For example, past experimentations with powergating
configuration changes seem to indicate a 60~70us delay. We go with
500us as default for now which should be over the required amount of
time (according to HW architects).

v2: Don't forget to save/restore registers used for the wait (Chris)

v3: Name used CS_GPR registers (Chris)
Fix compile issue due to rebase (Lionel)

v4: Fix save/restore helpers (Umesh)

v5: Move noa_wait from drm_i915_private to i915_perf_stream (Lionel)

v6: Add missing struct declarations in i915_perf.h

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson  (v4)
Signed-off-by: Chris Wilson  (v4)
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |   4 +-
 drivers/gpu/drm/i915/gt/intel_gt_types.h  |   5 +
 drivers/gpu/drm/i915/i915_debugfs.c   |  32 +++
 drivers/gpu/drm/i915/i915_perf.c  | 224 ++
 drivers/gpu/drm/i915/i915_perf_types.h|   8 +
 drivers/gpu/drm/i915/i915_reg.h   |   4 +-
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 drivers/gpu/drm/i915/selftests/i915_perf.c| 216 +
 8 files changed, 492 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/selftests/i915_perf.c

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 0987100c786b..8e63cffcabe0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -163,7 +163,8 @@
 #define MI_BATCH_BUFFER_START  MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
 #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1)
-#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
+#define   MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
+#define   MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/
 
 /*
  * 3D instructions used by the kernel
@@ -224,6 +225,7 @@
 #define   PIPE_CONTROL_CS_STALL(1<<20)
 #define   PIPE_CONTROL_TLB_INVALIDATE  (1<<18)
 #define   PIPE_CONTROL_MEDIA_STATE_CLEAR   (1<<16)
+#define   PIPE_CONTROL_WRITE_TIMESTAMP (3<<14)
 #define   PIPE_CONTROL_QW_WRITE(1<<14)
 #define   PIPE_CONTROL_POST_SYNC_OP_MASK(3<<14)
 #define   PIPE_CONTROL_DEPTH_STALL (1<<13)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 802f516a3430..be4b263621c8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -109,6 +109,11 @@ enum intel_gt_scratch_field {
/* 8 bytes */
INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
 
+   /* 6 * 8 bytes */
+   INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048,
+
+   /* 4 bytes */
+   INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096,
 };
 
 #endif /* __INTEL_GT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 277f31297f29..d463a28b7475 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3590,6 +3590,37 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
i915_wedged_get, i915_wedged_set,
"%llu\n");
 
+static int
+i915_perf_noa_delay_set(void *data, u64 val)
+{
+   struct drm_i915_private *i915 = data;
+   const u32 clk = RUNTIME_INFO(i915)->cs_timestamp_frequency_khz;
+
+   /*
+* This would lead to infinite waits as we're doing timestamp
+* difference on the CS with only 32bits.
+*/
+   if (val > mul_u32_u32(U32_MAX, clk))
+   return -EINVAL;
+
+   atomic64_set(>perf.noa_programming_delay, val);
+   return 0;
+}
+
+static int
+i915_perf_noa_delay_get(void *data, u64 *val)
+{
+   struct drm_i915_private *i915 = data;
+
+   *val = atomic64_read(>perf.noa_programming_delay);
+   return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops,
+   i915_perf_noa_delay_get,
+   i915_perf_noa_delay_set,
+   "%llu\n");
+
 #define DROP_UNBOUND   BIT(0)
 #define DROP_BOUND BIT(1)
 #define DROP_RETIREBIT(2)
@@ -4340,6 +4371,7 @@ static const struct i915_debugfs_files {
const char *name;
const struct file_operations *fops;
 } i915_debugfs_files[] = {
+   {"i915_perf_noa_delay", _perf_noa_delay_fops},
{"i915_wedged", _wedged_fops},
{"i915_cache_sharing", _cache_sharing_fops},
{"i915_gem_drop_caches", _drop_caches_fops},
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/driver

[Intel-gfx] [CI 6/9] drm/i915/perf: execute OA configuration from command stream

2019-10-10 Thread Chris Wilson
From: Lionel Landwerlin 

We haven't run into issues with programming the global OA/NOA
registers configuration from CPU so far, but HW engineers actually
recommend doing this from the command streamer. On TGL in particular
one of the clock domain in which some of that programming goes might
not be powered when we poke things from the CPU.

Since we have a command buffer prepared for the execbuffer side of
things, we can reuse that approach here too.

This also allows us to significantly reduce the amount of time we hold
the main lock.

v2: Drop the global lock as much as possible

v3: Take global lock to pin global

v4: Create i915 request in emit_oa_config() to avoid deadlocks (Lionel)

v5: Move locking to the stream (Lionel)

v6: Move active reconfiguration request into i915_perf_stream (Lionel)

v7: Pin VMA outside request creation (Chris)
Lock VMA before move to active (Chris)

v8: Fix double free on stream->initial_oa_config_bo (Lionel)
Don't allow interruption when waiting on active config request
(Lionel)

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson 
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_perf.c | 199 ---
 1 file changed, 156 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index caa4ab68cea5..c37fe275cf33 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1731,56 +1731,181 @@ static int alloc_noa_wait(struct i915_perf_stream 
*stream)
return 0;
 
 err_unpin:
-   __i915_vma_unpin(vma);
+   i915_vma_unpin_and_release(, 0);
 err_unref:
i915_gem_object_put(bo);
return ret;
 }
 
-static void config_oa_regs(struct intel_uncore *uncore,
-  const struct i915_oa_reg *regs,
-  u32 n_regs)
+static u32 *write_cs_mi_lri(u32 *cs,
+   const struct i915_oa_reg *reg_data,
+   u32 n_regs)
 {
u32 i;
 
for (i = 0; i < n_regs; i++) {
-   const struct i915_oa_reg *reg = regs + i;
+   if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
+   u32 n_lri = min_t(u32,
+ n_regs - i,
+ MI_LOAD_REGISTER_IMM_MAX_REGS);
+
+   *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
+   }
+   *cs++ = i915_mmio_reg_offset(reg_data[i].addr);
+   *cs++ = reg_data[i].value;
+   }
+
+   return cs;
+}
+
+static int num_lri_dwords(int num_regs)
+{
+   int count = 0;
+
+   if (num_regs > 0) {
+   count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS);
+   count += num_regs * 2;
+   }
+
+   return count;
+}
+
+static struct i915_oa_config_bo *
+alloc_oa_config_buffer(struct i915_perf_stream *stream,
+  struct i915_oa_config *oa_config)
+{
+   struct drm_i915_gem_object *obj;
+   struct i915_oa_config_bo *oa_bo;
+   size_t config_length = 0;
+   u32 *cs;
+   int err;
+
+   oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
+   if (!oa_bo)
+   return ERR_PTR(-ENOMEM);
+
+   config_length += num_lri_dwords(oa_config->mux_regs_len);
+   config_length += num_lri_dwords(oa_config->b_counter_regs_len);
+   config_length += num_lri_dwords(oa_config->flex_regs_len);
+   config_length++; /* MI_BATCH_BUFFER_END */
+   config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
+
+   obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
+   if (IS_ERR(obj)) {
+   err = PTR_ERR(obj);
+   goto err_free;
+   }
+
+   cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
+   if (IS_ERR(cs)) {
+   err = PTR_ERR(cs);
+   goto err_oa_bo;
+   }
 
-   intel_uncore_write(uncore, reg->addr, reg->value);
+   cs = write_cs_mi_lri(cs,
+oa_config->mux_regs,
+oa_config->mux_regs_len);
+   cs = write_cs_mi_lri(cs,
+oa_config->b_counter_regs,
+oa_config->b_counter_regs_len);
+   cs = write_cs_mi_lri(cs,
+oa_config->flex_regs,
+oa_config->flex_regs_len);
+
+   *cs++ = MI_BATCH_BUFFER_END;
+
+   i915_gem_object_flush_map(obj);
+   i915_gem_object_unpin_map(obj);
+
+   oa_bo->vma = i915_vma_instance(obj,
+  >engine->gt->ggtt->vm,
+  NULL);
+   if (IS_ERR(oa_bo->vma)) {
+   err = PTR_ERR(oa_bo->vma);
+   goto err_oa_bo;
}
+
+   oa_bo->oa_config = i915_oa_config_get(oa_config);
+   ll

[Intel-gfx] [CI 2/9] drm/i915/perf: introduce a versioning of the i915-perf uapi

2019-10-10 Thread Chris Wilson
From: Lionel Landwerlin 

Reporting this version will help application figure out what level of
the support the running kernel provides.

v2: Add i915_perf_ioctl_version() (Chris)

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson 
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_getparam.c |  4 
 drivers/gpu/drm/i915/i915_perf.c | 10 ++
 drivers/gpu/drm/i915/i915_perf.h |  1 +
 include/uapi/drm/i915_drm.h  | 21 +
 4 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_getparam.c 
b/drivers/gpu/drm/i915/i915_getparam.c
index f4b3cbb1adce..ad33fbe90a28 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -5,6 +5,7 @@
 #include "gt/intel_engine_user.h"
 
 #include "i915_drv.h"
+#include "i915_perf.h"
 
 int i915_getparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
@@ -156,6 +157,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
case I915_PARAM_MMAP_GTT_COHERENT:
value = INTEL_INFO(i915)->has_coherent_ggtt;
break;
+   case I915_PARAM_PERF_REVISION:
+   value = i915_perf_ioctl_version();
+   break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c4a436dfb7db..0b51ab3ab523 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3665,3 +3665,13 @@ void i915_perf_fini(struct drm_i915_private *i915)
memset(>ops, 0, sizeof(perf->ops));
perf->i915 = NULL;
 }
+
+/**
+ * i915_perf_ioctl_version - Version of the i915-perf subsystem
+ *
+ * This version number is used by userspace to detect available features.
+ */
+int i915_perf_ioctl_version(void)
+{
+   return 1;
+}
diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h
index ff412fb0dbbf..295e33e8eef7 100644
--- a/drivers/gpu/drm/i915/i915_perf.h
+++ b/drivers/gpu/drm/i915/i915_perf.h
@@ -20,6 +20,7 @@ void i915_perf_init(struct drm_i915_private *i915);
 void i915_perf_fini(struct drm_i915_private *i915);
 void i915_perf_register(struct drm_i915_private *i915);
 void i915_perf_unregister(struct drm_i915_private *i915);
+int i915_perf_ioctl_version(void);
 
 int i915_perf_open_ioctl(struct drm_device *dev, void *data,
 struct drm_file *file);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 30c542144016..c50c712b3771 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -611,6 +611,13 @@ typedef struct drm_i915_irq_wait {
  * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
  */
 #define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
+
+/*
+ * Revision of the i915-perf uAPI. The value returned helps determine what
+ * i915-perf features are available. See drm_i915_perf_property_id.
+ */
+#define I915_PARAM_PERF_REVISION   54
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1844,23 +1851,31 @@ enum drm_i915_perf_property_id {
 * Open the stream for a specific context handle (as used with
 * execbuffer2). A stream opened for a specific context this way
 * won't typically require root privileges.
+*
+* This property is available in perf revision 1.
 */
DRM_I915_PERF_PROP_CTX_HANDLE = 1,
 
/**
 * A value of 1 requests the inclusion of raw OA unit reports as
 * part of stream samples.
+*
+* This property is available in perf revision 1.
 */
DRM_I915_PERF_PROP_SAMPLE_OA,
 
/**
 * The value specifies which set of OA unit metrics should be
 * be configured, defining the contents of any OA unit reports.
+*
+* This property is available in perf revision 1.
 */
DRM_I915_PERF_PROP_OA_METRICS_SET,
 
/**
 * The value specifies the size and layout of OA unit reports.
+*
+* This property is available in perf revision 1.
 */
DRM_I915_PERF_PROP_OA_FORMAT,
 
@@ -1870,6 +1885,8 @@ enum drm_i915_perf_property_id {
 * from this exponent as follows:
 *
 *   80ns * 2^(period_exponent + 1)
+*
+* This property is available in perf revision 1.
 */
DRM_I915_PERF_PROP_OA_EXPONENT,
 
@@ -1901,6 +1918,8 @@ struct drm_i915_perf_open_param {
  * to close and re-open a stream with the same configuration.
  *
  * It's undefined whether any pending data for the stream will be lost.
+ *
+ * This ioctl is available in perf revision 1.
  */
 #define I915_PERF_IOCTL_ENABLE _IO('i', 0x0)
 
@@ -1908,6 +1927,8 @@ struct drm_i915_perf_open_param {
  * Disable data captu

Re: [Intel-gfx] [PATCH] drm/i915: Don't disable interrupts independently of the lock

2019-10-10 Thread Chris Wilson
Quoting Sebastian Andrzej Siewior (2019-10-10 17:06:40)
> The locks (active.lock and rq->lock) need to be taken with disabled
> interrupts. This is done in i915_request_retire() by disabling the
> interrupts independently of the locks itself.
> While local_irq_disable()+spin_lock() equals spin_lock_irq() on vanilla
> it does not on PREEMPT_RT. Also, it is not obvious if there is a special 
> reason
> to why the interrupts are disabled independently of the lock.
> 
> Enable/disable interrupts as part of the locking instruction.
> 
> Signed-off-by: Sebastian Andrzej Siewior 
> ---
>  drivers/gpu/drm/i915/i915_request.c |8 ++--
>  1 file changed, 2 insertions(+), 6 deletions(-)
> 
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -251,15 +251,13 @@ static bool i915_request_retire(struct i
> active->retire(active, rq);
> }
>  
> -   local_irq_disable();
> -
> /*
>  * We only loosely track inflight requests across preemption,
>  * and so we may find ourselves attempting to retire a _completed_
>  * request that we have removed from the HW and put back on a run
>  * queue.
>  */
> -   spin_lock(>engine->active.lock);
> +   spin_lock_irq(>engine->active.lock);
> list_del(>sched.link);
> spin_unlock(>engine->active.lock);
>  
> @@ -278,9 +276,7 @@ static bool i915_request_retire(struct i
> __notify_execute_cb(rq);
> }
> GEM_BUG_ON(!list_empty(>execute_cb));
> -   spin_unlock(>lock);
> -
> -   local_irq_enable();
> +   spin_unlock_irq(>lock);

Nothing screams about the imbalance? irq off from one lock to the other?
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 2/2] drm/i915/selftests: Check that GPR are cleared for new contexts

2019-10-10 Thread Chris Wilson
We want the general purpose registers to be clear in all new contexts so
that we can be confident that no information is leaked from one to the
next.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 185 ++---
 1 file changed, 166 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 0aa36b1b2389..1276da059dc6 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -19,6 +19,9 @@
 #include "gem/selftests/igt_gem_utils.h"
 #include "gem/selftests/mock_context.h"
 
+#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
+#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
+
 static struct i915_vma *create_scratch(struct intel_gt *gt)
 {
struct drm_i915_gem_object *obj;
@@ -2107,16 +2110,14 @@ static int preserved_virtual_engine(struct 
drm_i915_private *i915,
struct intel_engine_cs **siblings,
unsigned int nsibling)
 {
-#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
-
struct i915_request *last = NULL;
struct i915_gem_context *ctx;
struct intel_context *ve;
struct i915_vma *scratch;
struct igt_live_test t;
-   const int num_gpr = 16 * 2; /* each GPR is 2 dwords */
unsigned int n;
int err = 0;
+   u32 *cs;
 
ctx = kernel_context(i915);
if (!ctx)
@@ -2142,10 +2143,9 @@ static int preserved_virtual_engine(struct 
drm_i915_private *i915,
if (err)
goto out_unpin;
 
-   for (n = 0; n < num_gpr; n++) {
+   for (n = 0; n < NUM_GPR_DW; n++) {
struct intel_engine_cs *engine = siblings[n % nsibling];
struct i915_request *rq;
-   u32 *cs;
 
rq = i915_request_create(ve);
if (IS_ERR(rq)) {
@@ -2169,7 +2169,7 @@ static int preserved_virtual_engine(struct 
drm_i915_private *i915,
*cs++ = 0;
 
*cs++ = MI_LOAD_REGISTER_IMM(1);
-   *cs++ = CS_GPR(engine, (n + 1) % num_gpr);
+   *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
*cs++ = n + 1;
 
*cs++ = MI_NOOP;
@@ -2182,21 +2182,26 @@ static int preserved_virtual_engine(struct 
drm_i915_private *i915,
 
if (i915_request_wait(last, 0, HZ / 5) < 0) {
err = -ETIME;
-   } else {
-   u32 *map = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+   goto out_end;
+   }
 
-   for (n = 0; n < num_gpr; n++) {
-   if (map[n] != n) {
-   pr_err("Incorrect value[%d] found for 
GPR[%d]\n",
-  map[n], n);
-   err = -EINVAL;
-   break;
-   }
-   }
+   cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+   if (IS_ERR(cs)) {
+   err = PTR_ERR(cs);
+   goto out_end;
+   }
 
-   i915_gem_object_unpin_map(scratch->obj);
+   for (n = 0; n < NUM_GPR_DW; n++) {
+   if (cs[n] != n) {
+   pr_err("Incorrect value[%d] found for GPR[%d]\n",
+  cs[n], n);
+   err = -EINVAL;
+   break;
+   }
}
 
+   i915_gem_object_unpin_map(scratch->obj);
+
 out_end:
if (igt_live_test_end())
err = -EIO;
@@ -2210,8 +2215,6 @@ static int preserved_virtual_engine(struct 
drm_i915_private *i915,
 out_close:
kernel_context_close(ctx);
return err;
-
-#undef CS_GPR
 }
 
 static int live_virtual_preserved(void *arg)
@@ -2724,11 +2727,155 @@ static int live_lrc_state(void *arg)
return err;
 }
 
+static int gpr_make_dirty(struct intel_engine_cs *engine)
+{
+   struct i915_request *rq;
+   u32 *cs;
+   int n;
+
+   rq = i915_request_create(engine->kernel_context);
+   if (IS_ERR(rq))
+   return PTR_ERR(rq);
+
+   cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
+   if (IS_ERR(cs)) {
+   i915_request_add(rq);
+   return PTR_ERR(cs);
+   }
+
+   *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
+   for (n = 0; n < NUM_GPR_DW; n++) {
+   *cs++ = CS_GPR(engine, n);
+   *cs++ = STACK_MAGIC;
+   }
+   *cs++ = MI_NOOP;
+
+   intel_ring_advance(rq, cs);
+   i915_request_add(rq);
+
+   return 0;
+}
+
+static int __live_gpr_clear(struct i915_gem_context *fixme,
+   struct intel_engine_cs *engine,
+   struct i915_vma *scratch)
+{
+   struct intel_context *ce;
+

[Intel-gfx] [PATCH 1/2] drm/i915/selftests: Check known register values within the context

2019-10-10 Thread Chris Wilson
Check the logical ring context by asserting that the registers hold
expected start during execution. (It's a bit chicken-and-egg for how
could we manage to execute our request if the registers were not being
updated. Still, it's nice to verify that the HW is working as expected.)

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 126 +
 1 file changed, 126 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index a691e429ca01..0aa36b1b2389 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -2599,10 +2599,136 @@ static int live_lrc_layout(void *arg)
return err;
 }
 
+static int __live_lrc_state(struct i915_gem_context *fixme,
+   struct intel_engine_cs *engine,
+   struct i915_vma *scratch)
+{
+   struct intel_context *ce;
+   struct i915_request *rq;
+   enum {
+   RING_START_IDX = 0,
+   RING_TAIL_IDX,
+   MAX_IDX
+   };
+   u32 expected[MAX_IDX];
+   u32 *cs;
+   int err;
+   int n;
+
+   ce = intel_context_create(fixme, engine);
+   if (IS_ERR(ce))
+   return PTR_ERR(ce);
+
+   err = intel_context_pin(ce);
+   if (err)
+   goto err_put;
+
+   rq = i915_request_create(ce);
+   if (IS_ERR(rq)) {
+   err = PTR_ERR(rq);
+   goto err_unpin;
+   }
+
+   cs = intel_ring_begin(rq, 4 * MAX_IDX);
+   if (IS_ERR(cs)) {
+   err = PTR_ERR(cs);
+   i915_request_add(rq);
+   goto err_unpin;
+   }
+
+   *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+   *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
+   *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
+   *cs++ = 0;
+
+   expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
+
+   *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+   *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
+   *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
+   *cs++ = 0;
+
+   i915_request_get(rq);
+   i915_request_add(rq);
+
+   intel_engine_flush_submission(engine);
+   expected[RING_TAIL_IDX] = ce->ring->tail;
+
+   if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+   err = -ETIME;
+   goto err_rq;
+   }
+
+   cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+   if (IS_ERR(cs)) {
+   err = PTR_ERR(cs);
+   goto err_rq;
+   }
+
+   for (n = 0; n < MAX_IDX; n++) {
+   if (cs[n] != expected[n]) {
+   pr_err("%s: Stored register[%d] value[0x%x] did not 
match expected[0x%x]\n",
+  engine->name, n, cs[n], expected[n]);
+   err = -EINVAL;
+   break;
+   }
+   }
+
+   i915_gem_object_unpin_map(scratch->obj);
+
+err_rq:
+   i915_request_put(rq);
+err_unpin:
+   intel_context_unpin(ce);
+err_put:
+   intel_context_put(ce);
+   return err;
+}
+
+static int live_lrc_state(void *arg)
+{
+   struct intel_gt *gt = arg;
+   struct intel_engine_cs *engine;
+   struct i915_gem_context *fixme;
+   struct i915_vma *scratch;
+   enum intel_engine_id id;
+   int err = 0;
+
+   /*
+* Check the live register state matches what we expect for this
+* intel_context.
+*/
+
+   fixme = kernel_context(gt->i915);
+   if (!fixme)
+   return -ENOMEM;
+
+   scratch = create_scratch(gt);
+   if (IS_ERR(scratch)) {
+   err = PTR_ERR(scratch);
+   goto out_close;
+   }
+
+   for_each_engine(engine, gt->i915, id) {
+   err = __live_lrc_state(fixme, engine, scratch);
+   if (err)
+   break;
+   }
+
+   if (igt_flush_test(gt->i915))
+   err = -EIO;
+
+   i915_vma_unpin_and_release(, 0);
+out_close:
+   kernel_context_close(fixme);
+   return err;
+}
+
 int intel_lrc_live_selftests(struct drm_i915_private *i915)
 {
static const struct i915_subtest tests[] = {
SUBTEST(live_lrc_layout),
+   SUBTEST(live_lrc_state),
};
 
if (!HAS_LOGICAL_RING_CONTEXTS(i915))
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 7/9] drm/i915/perf: Allow dynamic reconfiguration of the OA stream

2019-10-10 Thread Chris Wilson
Quoting Lionel Landwerlin (2019-10-10 16:22:25)
> On 10/10/2019 00:19, Chris Wilson wrote:
> > From: Lionel Landwerlin 
> >
> > Introduce a new perf_ioctl command to change the OA configuration of the
> > active stream. This allows the OA stream to be reconfigured between
> > batch buffers, giving greater flexibility in sampling. We inject a
> > request into the OA context to reconfigure the stream asynchronously on
> > the GPU in between and ordered with execbuffer calls.
> >
> > Signed-off-by: Lionel Landwerlin 
> 
> 
> So much simpler :)

Indeed, it all came together into a much more coherent story.

> > ---
> >   /**
> >* i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
> >* @stream: An i915 perf stream
> > @@ -2879,6 +2901,8 @@ static long i915_perf_ioctl_locked(struct 
> > i915_perf_stream *stream,
> >   case I915_PERF_IOCTL_DISABLE:
> >   i915_perf_disable_locked(stream);
> >   return 0;
> > + case I915_PERF_IOCTL_CONFIG:
> > + return i915_perf_config_locked(stream, arg);
> 
> For REMOVE_CONFIG we passed a pointer to an u64, not sure whether we 
> should reuse the same pattern here?

Aiui, the user creates oa-config handles, and/or queries them. If we are
simpler talking handles that fit inside unsigned long (so assume u32)
then I don't see the harm in passing an id rather than a pointer.

The alternative is this takes an uuid string? Or you want to always use
u64 handles?

I guess you will have a better idea what works better after playing
around with userspace.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [CI 1/2] drm/i915/perf: store the associated engine of a stream

2019-10-10 Thread Chris Wilson
From: Lionel Landwerlin 

We'll use this information later to verify that a client trying to
reconfigure the stream does so on the right engine. For now, we want to
pull the knowledge of which engine we use into a central property.

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_perf.c   | 30 ++
 drivers/gpu/drm/i915/i915_perf_types.h |  5 +
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 5a34cad7d824..1a5c6591b9bb 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -197,6 +197,7 @@
 
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_pm.h"
+#include "gt/intel_engine_user.h"
 #include "gt/intel_lrc_reg.h"
 
 #include "i915_drv.h"
@@ -347,6 +348,7 @@ static const struct i915_oa_format 
gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
  * @oa_format: An OA unit HW report format
  * @oa_periodic: Whether to enable periodic OA unit sampling
  * @oa_period_exponent: The OA unit sampling period is derived from this
+ * @engine: The engine (typically rcs0) being monitored by the OA unit
  *
  * As read_properties_unlocked() enumerates and validates the properties given
  * to open a stream of metrics the configuration is built up in the structure
@@ -363,6 +365,8 @@ struct perf_open_properties {
int oa_format;
bool oa_periodic;
int oa_period_exponent;
+
+   struct intel_engine_cs *engine;
 };
 
 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
@@ -1205,7 +1209,7 @@ static struct intel_context *oa_pin_context(struct 
i915_perf_stream *stream)
int err;
 
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
-   if (ce->engine->class != RENDER_CLASS)
+   if (ce->engine != stream->engine) /* first match! */
continue;
 
/*
@@ -2127,7 +2131,13 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
int format_size;
int ret;
 
-   /* If the sysfs metrics/ directory wasn't registered for some
+   if (!props->engine) {
+   DRM_DEBUG("OA engine not specified\n");
+   return -EINVAL;
+   }
+
+   /*
+* If the sysfs metrics/ directory wasn't registered for some
 * reason then don't let userspace try their luck with config
 * IDs
 */
@@ -2146,7 +2156,8 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
return -ENODEV;
}
 
-   /* To avoid the complexity of having to accurately filter
+   /*
+* To avoid the complexity of having to accurately filter
 * counter reports and marshal to the appropriate client
 * we currently only allow exclusive access
 */
@@ -2160,6 +2171,9 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
return -EINVAL;
}
 
+   stream->engine = props->engine;
+   stream->gt = stream->engine->gt;
+
stream->sample_size = sizeof(struct drm_i915_perf_record_header);
 
format_size = perf->oa_formats[props->oa_format].size;
@@ -2711,7 +2725,6 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
}
 
stream->perf = perf;
-   stream->gt = >i915->gt;
stream->ctx = specific_ctx;
 
ret = i915_oa_stream_init(stream, param, props);
@@ -2796,6 +2809,15 @@ static int read_properties_unlocked(struct i915_perf 
*perf,
return -EINVAL;
}
 
+   /* At the moment we only support using i915-perf on the RCS. */
+   props->engine = intel_engine_lookup_user(perf->i915,
+I915_ENGINE_CLASS_RENDER,
+0);
+   if (!props->engine) {
+   DRM_DEBUG("No RENDER-capable engines\n");
+   return -EINVAL;
+   }
+
/* Considering that ID = 0 is reserved and assuming that we don't
 * (currently) expect any configurations to ever specify duplicate
 * values for a particular property ID then the last _PROP_MAX value is
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h 
b/drivers/gpu/drm/i915/i915_perf_types.h
index 2d17059d32ee..82cd3b295037 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -140,6 +140,11 @@ struct i915_perf_stream {
 */
intel_wakeref_t wakeref;
 
+   /**
+* @engine: Engine associated with this performance stream.
+*/
+   struct intel_engine_cs *engine;
+
/**
 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
 * properties given when opening a stream, representing 

[Intel-gfx] [CI 2/2] drm/i915/perf: Store shortcut to intel_uncore

2019-10-10 Thread Chris Wilson
Now that we have the engine stored in i915_perf, we have a means of
accessing intel_gt should we require it. However, we are currently only
using the intel_gt to find the right intel_uncore, so replace our
i915_perf.gt pointer with the more useful i915_perf.uncore.

Signed-off-by: Chris Wilson 
Cc: Lionel Landwerlin 
Reviewed-by: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/i915_perf.c   | 48 +-
 drivers/gpu/drm/i915/i915_perf_types.h |  4 +--
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 1a5c6591b9bb..77c3cef64548 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -419,14 +419,14 @@ static int get_oa_config(struct i915_perf *perf,
 
 static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
 
return intel_uncore_read(uncore, GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
 }
 
 static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
u32 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
 
return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
@@ -656,7 +656,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
*stream,
  size_t count,
  size_t *offset)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
int report_size = stream->oa_buffer.format_size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
@@ -866,7 +866,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
size_t count,
size_t *offset)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
u32 oastatus;
int ret;
 
@@ -945,7 +945,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream 
*stream,
  size_t count,
  size_t *offset)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
int report_size = stream->oa_buffer.format_size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
@@ -1077,7 +1077,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
size_t count,
size_t *offset)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
u32 oastatus1;
int ret;
 
@@ -1352,8 +1352,8 @@ static void i915_oa_stream_destroy(struct 
i915_perf_stream *stream)
 
free_oa_buffer(stream);
 
-   intel_uncore_forcewake_put(stream->gt->uncore, FORCEWAKE_ALL);
-   intel_runtime_pm_put(stream->gt->uncore->rpm, stream->wakeref);
+   intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
+   intel_runtime_pm_put(stream->uncore->rpm, stream->wakeref);
 
if (stream->ctx)
oa_put_render_ctx_id(stream);
@@ -1368,7 +1368,7 @@ static void i915_oa_stream_destroy(struct 
i915_perf_stream *stream)
 
 static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
unsigned long flags;
 
@@ -1416,7 +1416,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream 
*stream)
 
 static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
unsigned long flags;
 
@@ -1565,7 +1565,7 @@ static void delay_after_mux(void)
 
 static int hsw_enable_metric_set(struct i915_perf_stream *stream)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
const struct i915_oa_config *oa_config = stream->oa_config;
 
/*
@@ -1594,7 +1594,7 @@ static int hsw_enable_metric_set(struct i915_perf_stream 
*stream)
 
 static void hsw_disable_metric_set(struct i915_perf_stream *stream)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
 
intel_uncore_rmw(uncore, GEN6_UCGCTL1,
   

Re: [Intel-gfx] [PATCH 1/2] drm/i915/perf: store the associated engine of a stream

2019-10-10 Thread Chris Wilson
Quoting Lionel Landwerlin (2019-10-10 15:57:32)
> On 10/10/2019 10:27, Chris Wilson wrote:
> > From: Lionel Landwerlin 
> >
> > We'll use this information later to verify that a client trying to
> > reconfigure the stream does so on the right engine. For now, we want to
> > pull the knowledge of which engine we use into a central property.
> >
> > Signed-off-by: Lionel Landwerlin 
> 
> 
> Your changes look fine :
> 
> Reviewed-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson 

And the queue gradually shrinks.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] RFC drm/i915: Allow userspace to specify ringsize on construction

2019-10-10 Thread Chris Wilson
No good reason why we must always use a static ringsize, so let
userspace select one during construction.

Signed-off-by: Chris Wilson 
Cc: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 83 +++--
 include/uapi/drm/i915_drm.h | 12 +++
 2 files changed, 89 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 46e5b3b53288..9635e377c8ae 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -455,23 +455,30 @@ __create_context(struct drm_i915_private *i915)
return ERR_PTR(err);
 }
 
-static void
+static int
 context_apply_all(struct i915_gem_context *ctx,
- void (*fn)(struct intel_context *ce, void *data),
+ int (*fn)(struct intel_context *ce, void *data),
  void *data)
 {
struct i915_gem_engines_iter it;
struct intel_context *ce;
+   int err = 0;
 
-   for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
-   fn(ce, data);
+   for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+   err = fn(ce, data);
+   if (err)
+   break;
+   }
i915_gem_context_unlock_engines(ctx);
+
+   return err;
 }
 
-static void __apply_ppgtt(struct intel_context *ce, void *vm)
+static int __apply_ppgtt(struct intel_context *ce, void *vm)
 {
i915_vm_put(ce->vm);
ce->vm = i915_vm_get(vm);
+   return 0;
 }
 
 static struct i915_address_space *
@@ -509,9 +516,10 @@ static void __set_timeline(struct intel_timeline **dst,
intel_timeline_put(old);
 }
 
-static void __apply_timeline(struct intel_context *ce, void *timeline)
+static int __apply_timeline(struct intel_context *ce, void *timeline)
 {
__set_timeline(>timeline, timeline);
+   return 0;
 }
 
 static void __assign_timeline(struct i915_gem_context *ctx,
@@ -1086,6 +1094,65 @@ static int set_ppgtt(struct drm_i915_file_private 
*file_priv,
return err;
 }
 
+static int __apply_ringsize(struct intel_context *ce, void *sz)
+{
+   int err = 0;
+
+   if (intel_context_lock_pinned(ce))
+   return -EINTR;
+
+   if (intel_context_is_pinned(ce)) {
+   err = -EBUSY; /* In active use! Come back later! */
+   goto unlock;
+   }
+
+   if (test_bit(CONTEXT_ALLOC_BIT, >flags)) {
+   struct intel_ring *ring;
+
+   /* Replace the existing ringbuffer */
+   ring = intel_engine_create_ring(ce->engine,
+   (unsigned long)sz);
+   if (IS_ERR(ring)) {
+   err = PTR_ERR(ring);
+   goto unlock;
+   }
+
+   intel_ring_put(ce->ring);
+   ce->ring = ring;
+
+   /* Context image will be updated on next pin */
+   } else {
+   ce->ring = sz;
+   }
+
+unlock:
+   intel_context_unlock_pinned(ce);
+   return err;
+}
+
+static int set_ringsize(struct i915_gem_context *ctx,
+   struct drm_i915_gem_context_param *args)
+{
+   if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915))
+   return -ENODEV;
+
+   if (args->size)
+   return -EINVAL;
+
+   if (!IS_ALIGNED(args->value, I915_GTT_PAGE_SIZE))
+   return -EINVAL;
+
+   if (args->value < I915_GTT_PAGE_SIZE)
+   return -EINVAL;
+
+   if (args->value > 128 * I915_GTT_PAGE_SIZE)
+   return -EINVAL;
+
+   return context_apply_all(ctx,
+__apply_ringsize,
+__intel_context_ring_size(args->value));
+}
+
 static int gen8_emit_rpcs_config(struct i915_request *rq,
 struct intel_context *ce,
 struct intel_sseu sseu)
@@ -1798,6 +1865,10 @@ static int ctx_setparam(struct drm_i915_file_private 
*fpriv,
ret = set_persistence(ctx, args);
break;
 
+   case I915_CONTEXT_PARAM_RINGSIZE:
+   ret = set_ringsize(ctx, args);
+   break;
+
case I915_CONTEXT_PARAM_BAN_PERIOD:
default:
ret = -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index eb9e704d717a..e375cd2cf66b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1580,6 +1580,18 @@ struct drm_i915_gem_context_param {
  * By default, new contexts allow persistence.
  */
 #define I915_CONTEXT_PARAM_PERSISTENCE 0xb
+
+/*
+ *
+ * I915_CONTEXT_PARAM_RINGSIZE:
+ *
+ * Sets the size of the ringbuffer to use for logical ring contexts.
+ * Only possible to be set prior to first use, i.e. during construction.
+ * Only applies to the current set of en

Re: [Intel-gfx] [PATCH] drm/i915: Honour O_NONBLOCK before throttling execbuf submissions

2019-10-10 Thread Chris Wilson
Quoting Chris Wilson (2019-10-10 14:48:49)
> Check the user's flags on the struct file before deciding whether or not
> to stall before submitting a request. This allows us to reasonably
> cheaply honour O_NONBLOCK without checking at more critical phases
> during request submission.

One might reasonably expect poll(POLLOUT) to be supported as well in
this case :|

Bring on ugpu.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915: Honour O_NONBLOCK before throttling execbuf submissions

2019-10-10 Thread Chris Wilson
Check the user's flags on the struct file before deciding whether or not
to stall before submitting a request. This allows us to reasonably
cheaply honour O_NONBLOCK without checking at more critical phases
during request submission.

Suggested-by: Joonas Lahtinen 
Signed-off-by: Chris Wilson 
Cc: Joonas Lahtinen 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 21 ---
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 98816c35ffc3..bc6bcb8f6d79 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2189,15 +2189,22 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, 
struct intel_context *ce)
intel_context_timeline_unlock(tl);
 
if (rq) {
-   if (i915_request_wait(rq,
- I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT) < 0) {
-   i915_request_put(rq);
-   err = -EINTR;
-   goto err_exit;
-   }
+   bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
+   long timeout;
+
+   timeout = MAX_SCHEDULE_TIMEOUT;
+   if (nonblock)
+   timeout = 0;
 
+   timeout = i915_request_wait(rq,
+   I915_WAIT_INTERRUPTIBLE,
+   timeout);
i915_request_put(rq);
+
+   if (timeout < 0) {
+   err = nonblock ? -EWOULDBLOCK : timeout;
+   goto err_exit;
+   }
}
 
eb->engine = ce->engine;
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 2/2] drm/i915/selftests: Check that GPR are cleared for new contexts

2019-10-10 Thread Chris Wilson
Quoting Chris Wilson (2019-10-10 14:15:21)
> +static int __live_gpr_clear(struct i915_gem_context *fixme,
> +   struct intel_engine_cs *engine,
> +   struct i915_vma *scratch)
> +{
> +   struct intel_context *ce;
> +   struct i915_request *rq;
> +   u32 *cs;
> +   int err;
> +   int n;
> +
> +   if (INTEL_GEN(engine->i915) < 8 && engine->class != RENDER_CLASS)
> +   return 0; /* GPR only on rcs0 for gen8 */

Nice comment, shame about the code.

I'll wait to make sure that this does indeed fail on Broadwell.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 2/2] drm/i915/selftests: Check that GPR are cleared for new contexts

2019-10-10 Thread Chris Wilson
We want the general purpose registers to be clear in all new contexts so
that we can be confident that no information is leaked from one to the
next.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 185 ++---
 1 file changed, 166 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index def1e64aaf1c..0f63016f91bc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -19,6 +19,9 @@
 #include "gem/selftests/igt_gem_utils.h"
 #include "gem/selftests/mock_context.h"
 
+#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
+#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
+
 static struct i915_vma *create_scratch(struct intel_gt *gt)
 {
struct drm_i915_gem_object *obj;
@@ -2107,16 +2110,14 @@ static int preserved_virtual_engine(struct 
drm_i915_private *i915,
struct intel_engine_cs **siblings,
unsigned int nsibling)
 {
-#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
-
struct i915_request *last = NULL;
struct i915_gem_context *ctx;
struct intel_context *ve;
struct i915_vma *scratch;
struct igt_live_test t;
-   const int num_gpr = 16 * 2; /* each GPR is 2 dwords */
unsigned int n;
int err = 0;
+   u32 *cs;
 
ctx = kernel_context(i915);
if (!ctx)
@@ -2142,10 +2143,9 @@ static int preserved_virtual_engine(struct 
drm_i915_private *i915,
if (err)
goto out_unpin;
 
-   for (n = 0; n < num_gpr; n++) {
+   for (n = 0; n < NUM_GPR_DW; n++) {
struct intel_engine_cs *engine = siblings[n % nsibling];
struct i915_request *rq;
-   u32 *cs;
 
rq = i915_request_create(ve);
if (IS_ERR(rq)) {
@@ -2169,7 +2169,7 @@ static int preserved_virtual_engine(struct 
drm_i915_private *i915,
*cs++ = 0;
 
*cs++ = MI_LOAD_REGISTER_IMM(1);
-   *cs++ = CS_GPR(engine, (n + 1) % num_gpr);
+   *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
*cs++ = n + 1;
 
*cs++ = MI_NOOP;
@@ -2182,21 +2182,26 @@ static int preserved_virtual_engine(struct 
drm_i915_private *i915,
 
if (i915_request_wait(last, 0, HZ / 5) < 0) {
err = -ETIME;
-   } else {
-   u32 *map = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+   goto out_end;
+   }
 
-   for (n = 0; n < num_gpr; n++) {
-   if (map[n] != n) {
-   pr_err("Incorrect value[%d] found for 
GPR[%d]\n",
-  map[n], n);
-   err = -EINVAL;
-   break;
-   }
-   }
+   cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+   if (IS_ERR(cs)) {
+   err = PTR_ERR(cs);
+   goto out_end;
+   }
 
-   i915_gem_object_unpin_map(scratch->obj);
+   for (n = 0; n < NUM_GPR_DW; n++) {
+   if (cs[n] != n) {
+   pr_err("Incorrect value[%d] found for GPR[%d]\n",
+  cs[n], n);
+   err = -EINVAL;
+   break;
+   }
}
 
+   i915_gem_object_unpin_map(scratch->obj);
+
 out_end:
if (igt_live_test_end())
err = -EIO;
@@ -2210,8 +2215,6 @@ static int preserved_virtual_engine(struct 
drm_i915_private *i915,
 out_close:
kernel_context_close(ctx);
return err;
-
-#undef CS_GPR
 }
 
 static int live_virtual_preserved(void *arg)
@@ -2736,11 +2739,155 @@ static int live_lrc_state(void *arg)
return err;
 }
 
+static int gpr_make_dirty(struct intel_engine_cs *engine)
+{
+   struct i915_request *rq;
+   u32 *cs;
+   int n;
+
+   rq = i915_request_create(engine->kernel_context);
+   if (IS_ERR(rq))
+   return PTR_ERR(rq);
+
+   cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
+   if (IS_ERR(cs)) {
+   i915_request_add(rq);
+   return PTR_ERR(cs);
+   }
+
+   *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
+   for (n = 0; n < NUM_GPR_DW; n++) {
+   *cs++ = CS_GPR(engine, n);
+   *cs++ = STACK_MAGIC;
+   }
+   *cs++ = MI_NOOP;
+
+   intel_ring_advance(rq, cs);
+   i915_request_add(rq);
+
+   return 0;
+}
+
+static int __live_gpr_clear(struct i915_gem_context *fixme,
+   struct intel_engine_cs *engine,
+   struct i915_vma *scratch)
+{
+   struct intel_context *ce;
+

[Intel-gfx] [PATCH 1/2] drm/i915/selftests: Check known register values within the context

2019-10-10 Thread Chris Wilson
Check the logical ring context by asserting that the registers hold
expected start during execution. (It's a bit chicken-and-egg for how
could we manage to execute our request if the registers were not being
updated. Still, it's nice to verify that the HW is working as expected.)

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 138 +
 1 file changed, 138 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index a691e429ca01..def1e64aaf1c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -2599,10 +2599,148 @@ static int live_lrc_layout(void *arg)
return err;
 }
 
+static int __live_lrc_state(struct i915_gem_context *fixme,
+   struct intel_engine_cs *engine,
+   struct i915_vma *scratch)
+{
+   struct intel_context *ce;
+   struct i915_request *rq;
+   enum {
+   RING_START_IDX = 0,
+   RING_HEAD_IDX,
+   RING_TAIL_IDX,
+   MAX_IDX
+   };
+   u32 expected[MAX_IDX];
+   u32 *cs;
+   int err;
+   int n;
+
+   ce = intel_context_create(fixme, engine);
+   if (IS_ERR(ce))
+   return PTR_ERR(ce);
+
+   err = intel_context_pin(ce);
+   if (err)
+   goto err_put;
+
+   rq = i915_request_create(ce);
+   if (IS_ERR(rq)) {
+   err = PTR_ERR(rq);
+   goto err_unpin;
+   }
+
+   expected[RING_HEAD_IDX] = ce->ring->emit;
+
+   cs = intel_ring_begin(rq, 4 * MAX_IDX);
+   if (IS_ERR(cs)) {
+   err = PTR_ERR(cs);
+   i915_request_add(rq);
+   goto err_unpin;
+   }
+
+   *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+   *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
+   *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
+   *cs++ = 0;
+
+   expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
+
+   *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+   *cs++ = i915_mmio_reg_offset(RING_HEAD(engine->mmio_base));
+   *cs++ = i915_ggtt_offset(scratch) + RING_HEAD_IDX * sizeof(u32);
+   *cs++ = 0;
+
+   expected[RING_HEAD_IDX] += 6 * sizeof(u32);
+   if (engine->class == RENDER_CLASS)
+   expected[RING_HEAD_IDX] += 2 * sizeof(u32);
+
+   *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+   *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
+   *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
+   *cs++ = 0;
+
+   i915_request_get(rq);
+   i915_request_add(rq);
+
+   intel_engine_flush_submission(engine);
+   expected[RING_TAIL_IDX] = ce->ring->tail;
+
+   if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+   err = -ETIME;
+   goto err_rq;
+   }
+
+   cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+   if (IS_ERR(cs)) {
+   err = PTR_ERR(cs);
+   goto err_rq;
+   }
+
+   for (n = 0; n < MAX_IDX; n++) {
+   if (cs[n] != expected[n]) {
+   pr_err("%s: Stored register[%d] value[0x%x] did not 
match expected[0x%x]\n",
+  engine->name, n, cs[n], expected[n]);
+   err = -EINVAL;
+   break;
+   }
+   }
+
+   i915_gem_object_unpin_map(scratch->obj);
+
+err_rq:
+   i915_request_put(rq);
+err_unpin:
+   intel_context_unpin(ce);
+err_put:
+   intel_context_put(ce);
+   return err;
+}
+
+static int live_lrc_state(void *arg)
+{
+   struct intel_gt *gt = arg;
+   struct intel_engine_cs *engine;
+   struct i915_gem_context *fixme;
+   struct i915_vma *scratch;
+   enum intel_engine_id id;
+   int err = 0;
+
+   /*
+* Check the live register state matches what we expect for this
+* intel_context.
+*/
+
+   fixme = kernel_context(gt->i915);
+   if (!fixme)
+   return -ENOMEM;
+
+   scratch = create_scratch(gt);
+   if (IS_ERR(scratch)) {
+   err = PTR_ERR(scratch);
+   goto out_close;
+   }
+
+   for_each_engine(engine, gt->i915, id) {
+   err = __live_lrc_state(fixme, engine, scratch);
+   if (err)
+   break;
+   }
+
+   if (igt_flush_test(gt->i915))
+   err = -EIO;
+
+   i915_vma_unpin_and_release(, 0);
+out_close:
+   kernel_context_close(fixme);
+   return err;
+}
+
 int intel_lrc_live_selftests(struct drm_i915_private *i915)
 {
static const struct i915_subtest tests[] = {
SUBTEST(live_lrc_layout),
+   SUBTEST(live_lrc_state),
};
 
if (!HAS_LOGICAL_RING_CONTEXTS(i915))
-

Re: [Intel-gfx] [PATCH] drm/i915/selftests: Check that registers are preserved between virtual engines

2019-10-10 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-10-10 13:31:04)
> 
> On 10/10/2019 12:02, Chris Wilson wrote:
> > Make sure that we copy across the registers from one engine to the next,
> > as we hop around a virtual engine.
> > 
> > Signed-off-by: Chris Wilson 
> > Cc: Tvrtko Ursulin 
> > ---
> > Skip the test on gen8 as the context image is devoid of CS_GPR.
> > ---
> >   drivers/gpu/drm/i915/gt/selftest_lrc.c | 180 +
> >   1 file changed, 180 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
> > b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> > index 198cf2f754f4..9ee1fdd16aff 100644
> > --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> > @@ -1952,6 +1952,33 @@ static int live_virtual_engine(void *arg)
> >   return 0;
> >   }
> >   
> > +static struct i915_vma *create_scratch(struct intel_gt *gt)
> > +{
> > + struct drm_i915_gem_object *obj;
> > + struct i915_vma *vma;
> > + int err;
> > +
> > + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
> > + if (IS_ERR(obj))
> > + return ERR_CAST(obj);
> > +
> > + i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
> > +
> > + vma = i915_vma_instance(obj, >ggtt->vm, NULL);
> > + if (IS_ERR(vma)) {
> > + i915_gem_object_put(obj);
> > + return vma;
> > + }
> > +
> > + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
> > + if (err) {
> > + i915_gem_object_put(obj);
> > + return ERR_PTR(err);
> > + }
> > +
> > + return vma;
> > +}
> > +
> >   static int mask_virtual_engine(struct drm_i915_private *i915,
> >  struct intel_engine_cs **siblings,
> >  unsigned int nsibling)
> > @@ -2076,6 +2103,158 @@ static int live_virtual_mask(void *arg)
> >   return 0;
> >   }
> >   
> > +static int preserved_virtual_engine(struct drm_i915_private *i915,
> > + struct intel_engine_cs **siblings,
> > + unsigned int nsibling)
> > +{
> > +#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
> > +
> > + struct i915_request *last = NULL;
> > + struct i915_gem_context *ctx;
> > + struct intel_context *ve;
> > + struct i915_vma *scratch;
> > + struct igt_live_test t;
> > + const int num_gpr = 16 * 2; /* each GPR is 2 dwords */
> > + unsigned int n;
> > + int err = 0;
> > +
> > + ctx = kernel_context(i915);
> > + if (!ctx)
> > + return -ENOMEM;
> > +
> > + scratch = create_scratch(siblings[0]->gt);
> > + if (IS_ERR(scratch)) {
> > + err = PTR_ERR(scratch);
> > + goto out_close;
> > + }
> > +
> > + ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
> > + if (IS_ERR(ve)) {
> > + err = PTR_ERR(ve);
> > + goto out_scratch;
> > + }
> > +
> > + err = intel_context_pin(ve);
> > + if (err)
> > + goto out_put;
> > +
> > + err = igt_live_test_begin(, i915, __func__, ve->engine->name);
> > + if (err)
> > + goto out_unpin;
> > +
> > + for (n = 0; n < num_gpr; n++) {
> > + struct intel_engine_cs *engine = siblings[n % nsibling];
> > + struct i915_request *rq;
> > + u32 *cs;
> > +
> > + rq = i915_request_create(ve);
> > + if (IS_ERR(rq)) {
> > + err = PTR_ERR(rq);
> > + goto out_end;
> > + }
> > +
> > + i915_request_put(last);
> > + last = i915_request_get(rq);
> > +
> > + cs = intel_ring_begin(rq, 8);
> > + if (IS_ERR(cs)) {
> > + i915_request_add(rq);
> > + err = PTR_ERR(cs);
> > + goto out_end;
> > + }
> > +
> > + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
> > + *cs++ = CS_GPR(engine, n);
> > + *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
> > + *cs++ = 0;
> > +
> > + *cs++ = MI_LOAD_REGISTER_IMM(1);
> > + *cs++ = CS_GPR(engine, (n + 

[Intel-gfx] [PATCH 2/2] drm/i915/selftests: Check known register values within the context

2019-10-10 Thread Chris Wilson
Check the logical ring context by asserting that the registers hold
expected start during execution. (It's a bit chicken-and-egg for how
could we manage to execute our request if the registers were not being
updated. Still, it's nice to verify that the HW is working as expected.)

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 138 +
 1 file changed, 138 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index a691e429ca01..def1e64aaf1c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -2599,10 +2599,148 @@ static int live_lrc_layout(void *arg)
return err;
 }
 
+static int __live_lrc_state(struct i915_gem_context *fixme,
+   struct intel_engine_cs *engine,
+   struct i915_vma *scratch)
+{
+   struct intel_context *ce;
+   struct i915_request *rq;
+   enum {
+   RING_START_IDX = 0,
+   RING_HEAD_IDX,
+   RING_TAIL_IDX,
+   MAX_IDX
+   };
+   u32 expected[MAX_IDX];
+   u32 *cs;
+   int err;
+   int n;
+
+   ce = intel_context_create(fixme, engine);
+   if (IS_ERR(ce))
+   return PTR_ERR(ce);
+
+   err = intel_context_pin(ce);
+   if (err)
+   goto err_put;
+
+   rq = i915_request_create(ce);
+   if (IS_ERR(rq)) {
+   err = PTR_ERR(rq);
+   goto err_unpin;
+   }
+
+   expected[RING_HEAD_IDX] = ce->ring->emit;
+
+   cs = intel_ring_begin(rq, 4 * MAX_IDX);
+   if (IS_ERR(cs)) {
+   err = PTR_ERR(cs);
+   i915_request_add(rq);
+   goto err_unpin;
+   }
+
+   *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+   *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
+   *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
+   *cs++ = 0;
+
+   expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
+
+   *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+   *cs++ = i915_mmio_reg_offset(RING_HEAD(engine->mmio_base));
+   *cs++ = i915_ggtt_offset(scratch) + RING_HEAD_IDX * sizeof(u32);
+   *cs++ = 0;
+
+   expected[RING_HEAD_IDX] += 6 * sizeof(u32);
+   if (engine->class == RENDER_CLASS)
+   expected[RING_HEAD_IDX] += 2 * sizeof(u32);
+
+   *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+   *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
+   *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
+   *cs++ = 0;
+
+   i915_request_get(rq);
+   i915_request_add(rq);
+
+   intel_engine_flush_submission(engine);
+   expected[RING_TAIL_IDX] = ce->ring->tail;
+
+   if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+   err = -ETIME;
+   goto err_rq;
+   }
+
+   cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+   if (IS_ERR(cs)) {
+   err = PTR_ERR(cs);
+   goto err_rq;
+   }
+
+   for (n = 0; n < MAX_IDX; n++) {
+   if (cs[n] != expected[n]) {
+   pr_err("%s: Stored register[%d] value[0x%x] did not 
match expected[0x%x]\n",
+  engine->name, n, cs[n], expected[n]);
+   err = -EINVAL;
+   break;
+   }
+   }
+
+   i915_gem_object_unpin_map(scratch->obj);
+
+err_rq:
+   i915_request_put(rq);
+err_unpin:
+   intel_context_unpin(ce);
+err_put:
+   intel_context_put(ce);
+   return err;
+}
+
+static int live_lrc_state(void *arg)
+{
+   struct intel_gt *gt = arg;
+   struct intel_engine_cs *engine;
+   struct i915_gem_context *fixme;
+   struct i915_vma *scratch;
+   enum intel_engine_id id;
+   int err = 0;
+
+   /*
+* Check the live register state matches what we expect for this
+* intel_context.
+*/
+
+   fixme = kernel_context(gt->i915);
+   if (!fixme)
+   return -ENOMEM;
+
+   scratch = create_scratch(gt);
+   if (IS_ERR(scratch)) {
+   err = PTR_ERR(scratch);
+   goto out_close;
+   }
+
+   for_each_engine(engine, gt->i915, id) {
+   err = __live_lrc_state(fixme, engine, scratch);
+   if (err)
+   break;
+   }
+
+   if (igt_flush_test(gt->i915))
+   err = -EIO;
+
+   i915_vma_unpin_and_release(, 0);
+out_close:
+   kernel_context_close(fixme);
+   return err;
+}
+
 int intel_lrc_live_selftests(struct drm_i915_private *i915)
 {
static const struct i915_subtest tests[] = {
SUBTEST(live_lrc_layout),
+   SUBTEST(live_lrc_state),
};
 
if (!HAS_LOGICAL_RING_CONTEXTS(i915))
-

[Intel-gfx] [PATCH 1/2] drm/i915/selftests: Check that registers are preserved between virtual engines

2019-10-10 Thread Chris Wilson
Make sure that we copy across the registers from one engine to the next,
as we hop around a virtual engine.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 180 +
 1 file changed, 180 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 198cf2f754f4..a691e429ca01 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -19,6 +19,33 @@
 #include "gem/selftests/igt_gem_utils.h"
 #include "gem/selftests/mock_context.h"
 
+static struct i915_vma *create_scratch(struct intel_gt *gt)
+{
+   struct drm_i915_gem_object *obj;
+   struct i915_vma *vma;
+   int err;
+
+   obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+   if (IS_ERR(obj))
+   return ERR_CAST(obj);
+
+   i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
+
+   vma = i915_vma_instance(obj, >ggtt->vm, NULL);
+   if (IS_ERR(vma)) {
+   i915_gem_object_put(obj);
+   return vma;
+   }
+
+   err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+   if (err) {
+   i915_gem_object_put(obj);
+   return ERR_PTR(err);
+   }
+
+   return vma;
+}
+
 static int live_sanitycheck(void *arg)
 {
struct drm_i915_private *i915 = arg;
@@ -2076,6 +2103,158 @@ static int live_virtual_mask(void *arg)
return 0;
 }
 
+static int preserved_virtual_engine(struct drm_i915_private *i915,
+   struct intel_engine_cs **siblings,
+   unsigned int nsibling)
+{
+#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
+
+   struct i915_request *last = NULL;
+   struct i915_gem_context *ctx;
+   struct intel_context *ve;
+   struct i915_vma *scratch;
+   struct igt_live_test t;
+   const int num_gpr = 16 * 2; /* each GPR is 2 dwords */
+   unsigned int n;
+   int err = 0;
+
+   ctx = kernel_context(i915);
+   if (!ctx)
+   return -ENOMEM;
+
+   scratch = create_scratch(siblings[0]->gt);
+   if (IS_ERR(scratch)) {
+   err = PTR_ERR(scratch);
+   goto out_close;
+   }
+
+   ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+   if (IS_ERR(ve)) {
+   err = PTR_ERR(ve);
+   goto out_scratch;
+   }
+
+   err = intel_context_pin(ve);
+   if (err)
+   goto out_put;
+
+   err = igt_live_test_begin(, i915, __func__, ve->engine->name);
+   if (err)
+   goto out_unpin;
+
+   for (n = 0; n < num_gpr; n++) {
+   struct intel_engine_cs *engine = siblings[n % nsibling];
+   struct i915_request *rq;
+   u32 *cs;
+
+   rq = i915_request_create(ve);
+   if (IS_ERR(rq)) {
+   err = PTR_ERR(rq);
+   goto out_end;
+   }
+
+   i915_request_put(last);
+   last = i915_request_get(rq);
+
+   cs = intel_ring_begin(rq, 8);
+   if (IS_ERR(cs)) {
+   i915_request_add(rq);
+   err = PTR_ERR(cs);
+   goto out_end;
+   }
+
+   *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+   *cs++ = CS_GPR(engine, n);
+   *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
+   *cs++ = 0;
+
+   *cs++ = MI_LOAD_REGISTER_IMM(1);
+   *cs++ = CS_GPR(engine, (n + 1) % num_gpr);
+   *cs++ = n + 1;
+
+   *cs++ = MI_NOOP;
+   intel_ring_advance(rq, cs);
+
+   /* Restrict this request to run on a particular engine */
+   rq->execution_mask = engine->mask;
+   i915_request_add(rq);
+   }
+
+   if (i915_request_wait(last, 0, HZ / 5) < 0) {
+   err = -ETIME;
+   } else {
+   u32 *map = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+
+   for (n = 0; n < num_gpr; n++) {
+   if (map[n] != n) {
+   pr_err("Incorrect value[%d] found for 
GPR[%d]\n",
+  map[n], n);
+   err = -EINVAL;
+   break;
+   }
+   }
+
+   i915_gem_object_unpin_map(scratch->obj);
+   }
+
+out_end:
+   if (igt_live_test_end())
+   err = -EIO;
+   i915_request_put(last);
+out_unpin:
+   intel_context_unpin(ve);
+out_put:
+   intel_context_put(ve);
+out_scratch:
+   i915_vma_unpin_and_release(, 0);
+out_close:
+   kernel_context_close(ctx);
+   return err;
+
+#undef CS_GPR
+}
+
+static int live_vir

[Intel-gfx] [PATCH] drm/i915/selftests: Check that registers are preserved between virtual engines

2019-10-10 Thread Chris Wilson
Make sure that we copy across the registers from one engine to the next,
as we hop around a virtual engine.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
Skip the test on gen8 as the context image is devoid of CS_GPR.
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 180 +
 1 file changed, 180 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 198cf2f754f4..9ee1fdd16aff 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1952,6 +1952,33 @@ static int live_virtual_engine(void *arg)
return 0;
 }
 
+static struct i915_vma *create_scratch(struct intel_gt *gt)
+{
+   struct drm_i915_gem_object *obj;
+   struct i915_vma *vma;
+   int err;
+
+   obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+   if (IS_ERR(obj))
+   return ERR_CAST(obj);
+
+   i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
+
+   vma = i915_vma_instance(obj, >ggtt->vm, NULL);
+   if (IS_ERR(vma)) {
+   i915_gem_object_put(obj);
+   return vma;
+   }
+
+   err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+   if (err) {
+   i915_gem_object_put(obj);
+   return ERR_PTR(err);
+   }
+
+   return vma;
+}
+
 static int mask_virtual_engine(struct drm_i915_private *i915,
   struct intel_engine_cs **siblings,
   unsigned int nsibling)
@@ -2076,6 +2103,158 @@ static int live_virtual_mask(void *arg)
return 0;
 }
 
+static int preserved_virtual_engine(struct drm_i915_private *i915,
+   struct intel_engine_cs **siblings,
+   unsigned int nsibling)
+{
+#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
+
+   struct i915_request *last = NULL;
+   struct i915_gem_context *ctx;
+   struct intel_context *ve;
+   struct i915_vma *scratch;
+   struct igt_live_test t;
+   const int num_gpr = 16 * 2; /* each GPR is 2 dwords */
+   unsigned int n;
+   int err = 0;
+
+   ctx = kernel_context(i915);
+   if (!ctx)
+   return -ENOMEM;
+
+   scratch = create_scratch(siblings[0]->gt);
+   if (IS_ERR(scratch)) {
+   err = PTR_ERR(scratch);
+   goto out_close;
+   }
+
+   ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+   if (IS_ERR(ve)) {
+   err = PTR_ERR(ve);
+   goto out_scratch;
+   }
+
+   err = intel_context_pin(ve);
+   if (err)
+   goto out_put;
+
+   err = igt_live_test_begin(, i915, __func__, ve->engine->name);
+   if (err)
+   goto out_unpin;
+
+   for (n = 0; n < num_gpr; n++) {
+   struct intel_engine_cs *engine = siblings[n % nsibling];
+   struct i915_request *rq;
+   u32 *cs;
+
+   rq = i915_request_create(ve);
+   if (IS_ERR(rq)) {
+   err = PTR_ERR(rq);
+   goto out_end;
+   }
+
+   i915_request_put(last);
+   last = i915_request_get(rq);
+
+   cs = intel_ring_begin(rq, 8);
+   if (IS_ERR(cs)) {
+   i915_request_add(rq);
+   err = PTR_ERR(cs);
+   goto out_end;
+   }
+
+   *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+   *cs++ = CS_GPR(engine, n);
+   *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
+   *cs++ = 0;
+
+   *cs++ = MI_LOAD_REGISTER_IMM(1);
+   *cs++ = CS_GPR(engine, (n + 1) % num_gpr);
+   *cs++ = n + 1;
+
+   *cs++ = MI_NOOP;
+   intel_ring_advance(rq, cs);
+
+   /* Restrict this request to run on a particular engine */
+   rq->execution_mask = engine->mask;
+   i915_request_add(rq);
+   }
+
+   if (i915_request_wait(last, 0, HZ / 5) < 0) {
+   err = -ETIME;
+   } else {
+   u32 *map = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+
+   for (n = 0; n < num_gpr; n++) {
+   if (map[n] != n) {
+   pr_err("Incorrect value[%d] found for 
GPR[%d]\n",
+  map[n], n);
+   err = -EINVAL;
+   break;
+   }
+   }
+
+   i915_gem_object_unpin_map(scratch->obj);
+   }
+
+out_end:
+   if (igt_live_test_end())
+   err = -EIO;
+   i915_request_put(last);
+out_unpin:
+   intel_context_unpin(ve);
+out_put:
+   intel_context_put(ve);
+out_scratch:

Re: [Intel-gfx] [PATCH] drm/i915/selftests: Check that registers are preserved between virtual engines

2019-10-10 Thread Chris Wilson
Quoting Chris Wilson (2019-10-10 11:36:57)
> Make sure that we copy across the registers from one engine to the next,
> as we hop around a virtual engine.

Looking at Broadwell's HW context image, there are no GPR registers for
xcs. Weird.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/selftests: Check that registers are preserved between virtual engines

2019-10-10 Thread Chris Wilson
Make sure that we copy across the registers from one engine to the next,
as we hop around a virtual engine.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 176 +
 1 file changed, 176 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 198cf2f754f4..ebb1e9b4e71d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1952,6 +1952,181 @@ static int live_virtual_engine(void *arg)
return 0;
 }
 
+static struct i915_vma *create_scratch(struct intel_gt *gt)
+{
+   struct drm_i915_gem_object *obj;
+   struct i915_vma *vma;
+   int err;
+
+   obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+   if (IS_ERR(obj))
+   return ERR_CAST(obj);
+
+   i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
+
+   vma = i915_vma_instance(obj, >ggtt->vm, NULL);
+   if (IS_ERR(vma)) {
+   i915_gem_object_put(obj);
+   return vma;
+   }
+
+   err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+   if (err) {
+   i915_gem_object_put(obj);
+   return ERR_PTR(err);
+   }
+
+   return vma;
+}
+
+static int preserved_virtual_engine(struct drm_i915_private *i915,
+   struct intel_engine_cs **siblings,
+   unsigned int nsibling)
+{
+#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
+
+   struct i915_request *last = NULL;
+   struct i915_gem_context *ctx;
+   struct intel_context *ve;
+   struct i915_vma *scratch;
+   struct igt_live_test t;
+   const int num_gpr = 16 * 2; /* each GPR is 2 dwords */
+   unsigned int n;
+   int err = 0;
+
+   ctx = kernel_context(i915);
+   if (!ctx)
+   return -ENOMEM;
+
+   scratch = create_scratch(siblings[0]->gt);
+   if (IS_ERR(scratch)) {
+   err = PTR_ERR(scratch);
+   goto out_close;
+   }
+
+   ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+   if (IS_ERR(ve)) {
+   err = PTR_ERR(ve);
+   goto out_scratch;
+   }
+
+   err = intel_context_pin(ve);
+   if (err)
+   goto out_put;
+
+   err = igt_live_test_begin(, i915, __func__, ve->engine->name);
+   if (err)
+   goto out_unpin;
+
+   for (n = 0; n < num_gpr; n++) {
+   struct intel_engine_cs *engine = siblings[n % nsibling];
+   struct i915_request *rq;
+   u32 *cs;
+
+   rq = i915_request_create(ve);
+   if (IS_ERR(rq)) {
+   err = PTR_ERR(rq);
+   goto out_end;
+   }
+
+   i915_request_put(last);
+   last = i915_request_get(rq);
+
+   cs = intel_ring_begin(rq, 8);
+   if (IS_ERR(cs)) {
+   i915_request_add(rq);
+   err = PTR_ERR(cs);
+   goto out_end;
+   }
+
+   *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+   *cs++ = CS_GPR(engine, n);
+   *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
+   *cs++ = 0;
+
+   *cs++ = MI_LOAD_REGISTER_IMM(1);
+   *cs++ = CS_GPR(engine, (n + 1) % num_gpr);
+   *cs++ = n + 1;
+
+   *cs++ = MI_NOOP;
+   intel_ring_advance(rq, cs);
+
+   /* Restrict this request to run on a particular engine */
+   rq->execution_mask = engine->mask;
+   i915_request_add(rq);
+   }
+
+   if (i915_request_wait(last, 0, HZ / 5) < 0) {
+   err = -ETIME;
+   } else {
+   u32 *map = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+
+   for (n = 0; n < num_gpr; n++) {
+   if (map[n] != n) {
+   pr_err("Incorrect value[%d] found for 
GPR[%d]\n",
+  map[n], n);
+   err = -EINVAL;
+   break;
+   }
+   }
+
+   i915_gem_object_unpin_map(scratch->obj);
+   }
+
+out_end:
+   if (igt_live_test_end())
+   err = -EIO;
+   i915_request_put(last);
+out_unpin:
+   intel_context_unpin(ve);
+out_put:
+   intel_context_put(ve);
+out_scratch:
+   i915_vma_unpin_and_release(, 0);
+out_close:
+   kernel_context_close(ctx);
+   return err;
+
+#undef CS_GPR
+}
+
+static int live_virtual_preserved(void *arg)
+{
+   struct drm_i915_private *i915 = arg;
+   struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+   struct intel_gt *gt = >gt;
+   unsigned int class, ins

Re: [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915/tgl: the BCS engine supports relative MMIO

2019-10-10 Thread Chris Wilson
Quoting Patchwork (2019-10-10 03:08:10)
> == Series Details ==
> 
> Series: series starting with [1/2] drm/i915/tgl: the BCS engine supports 
> relative MMIO
> URL   : https://patchwork.freedesktop.org/series/67809/
> State : success
> 
> == Summary ==
> 
> CI Bug Log - changes from CI_DRM_7046 -> Patchwork_14739
> 
> 
> Summary
> ---
> 
>   **SUCCESS**
> 
>   No regressions found.
> 
>   External URL: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14739/index.html

That's enough (boots and live_lrc found no inconsistencies) convincing,
pushed.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 2/2] drm/i915/tgl: simplify the lrc register list for !RCS

2019-10-10 Thread Chris Wilson
Quoting Daniele Ceraolo Spurio (2019-10-10 00:04:24)
> There are small differences between the blitter and the video engines in
> the xcs context image (e.g. registers 0x200 and 0x204 only exist on the
> blitter). Since we never explicitly set a value for those register and
> given that we don't need to update the offsets in the lrc image when we
> change engine within the class for virtual engine because the HW can
> handle that, instead of having a separate define for the BCS we can
> just restrict the programming to the part we're interested in, which is
> common across the engines.

Yeah, my thinking was to be as complete as possible so that if we needed
to apply register updates, we could. It was also a fascinating insight
into what was stored, I was planning on using it for doing
isolation testing (albeit that's a bit chicken-and-egg).

> Bspec: 45584
> Signed-off-by: Daniele Ceraolo Spurio 
> Cc: Chris Wilson 
> Cc: Mika Kuoppala 
> Cc: Stuart Summers 

No qualms about restricting ourselves to the bare essentials on the
basis that the context image is meant to be relative-addressed. It did
not improve stability of tgl-gem however.
Reviewed-by: Chris Wilson 
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 1/2] drm/i915/tgl: the BCS engine supports relative MMIO

2019-10-10 Thread Chris Wilson
Quoting Daniele Ceraolo Spurio (2019-10-10 00:04:23)
> The specs don't mention any specific HW limitation on the blitter and
> manual inspection shows that the HW does set the relative MMIO bit in
> the LRI of the blitter context image, so we can remove our limitations.

I concur, the HW itself sets the bit, so it can't be too harmful...

> Signed-off-by: Daniele Ceraolo Spurio 
> Cc: Chris Wilson 
> Cc: John Harrison 
> Cc: Mika Kuoppala 
Reviewed-by: Chris Wilson 
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/execlists: Mark up expected state during reset

2019-10-10 Thread Chris Wilson
Move the BUG_ON around slightly and add some explanations for each to
try and capture the expected state more carefully. We want to compare
the expected active state of our bookkeeping as compared to the tracked
HW state.

References: https://bugs.freedesktop.org/show_bug.cgi?id=111937
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 7ea58335f04c..7c0d3c343520 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2777,8 +2777,10 @@ static void __execlists_reset(struct intel_engine_cs 
*engine, bool stalled)
if (!rq)
goto unwind;
 
+   /* We still have requests in-flight; the engine should be active */
+   GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
+
ce = rq->hw_context;
-   GEM_BUG_ON(i915_active_is_idle(>active));
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
 
/* Proclaim we have exclusive access to the context image! */
@@ -2786,10 +2788,13 @@ static void __execlists_reset(struct intel_engine_cs 
*engine, bool stalled)
 
rq = active_request(rq);
if (!rq) {
+   /* Idle context; tidy up the ring so we can restart afresh */
ce->ring->head = ce->ring->tail;
goto out_replay;
}
 
+   /* Context has requests still in-flight; it should not be idle! */
+   GEM_BUG_ON(i915_active_is_idle(>active));
ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
 
/*
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH i-g-t 1/2] i915_drm.h sync

2019-10-10 Thread Chris Wilson
Update to commit fef476f3ab47527a00818ddaf4b46b8c0936 (not upstream!)
Author: Chris Wilson 
Date:   Mon Aug 5 22:55:44 2019 +0100

drm/i915: Cancel non-persistent contexts on close

for I915_CONTEXT_PARAM_PERSISTENCE
---
 include/drm-uapi/i915_drm.h | 22 --
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
index 761517f15..7badfa0b1 100644
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -521,6 +521,7 @@ typedef struct drm_i915_irq_wait {
 #define   I915_SCHEDULER_CAP_PRIORITY  (1ul << 1)
 #define   I915_SCHEDULER_CAP_PREEMPTION(1ul << 2)
 #define   I915_SCHEDULER_CAP_SEMAPHORES(1ul << 3)
+#define   I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4)
 
 #define I915_PARAM_HUC_STATUS   42
 
@@ -1564,6 +1565,21 @@ struct drm_i915_gem_context_param {
  *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
  */
 #define I915_CONTEXT_PARAM_ENGINES 0xa
+
+/*
+ * I915_CONTEXT_PARAM_PERSISTENCE:
+ *
+ * Allow the context and active rendering to survive the process until
+ * completion. Persistence allows fire-and-forget clients to queue up a
+ * bunch of work, hand the output over to a display server and the quit.
+ * If the context is not marked as persistent, upon closing (either via
+ * an explicit DRM_I915_GEM_CONTEXT_DESTROY or implicitly from file closure
+ * or process termination), the context and any outstanding requests will be
+ * cancelled (and exported fences for cancelled requests marked as -EIO).
+ *
+ * By default, new contexts allow persistence.
+ */
+#define I915_CONTEXT_PARAM_PERSISTENCE 0xb
 /* Must be kept compact -- no holes and well documented */
 
__u64 value;
@@ -2032,8 +2048,10 @@ struct drm_i915_query {
  *   (data[X / 8] >> (X % 8)) & 1
  *
  * - the subslice mask for each slice with one bit per subslice telling
- *   whether a subslice is available. The availability of subslice Y in slice
- *   X can be queried with the following formula :
+ *   whether a subslice is available. Gen12 has dual-subslices, which are
+ *   similar to two gen11 subslices. For gen12, this array represents dual-
+ *   subslices. The availability of subslice Y in slice X can be queried
+ *   with the following formula :
  *
  *   (data[subslice_offset +
  * X * subslice_stride +
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH i-g-t 2/2] Add i915/gem_ctx_persistence

2019-10-10 Thread Chris Wilson
Sanity test existing persistence and new exciting non-persistent context
behaviour.

Signed-off-by: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Michał Winiarski 
Cc: Jon Bloomfield 
Cc: Tvrtko Ursulin 
Cc: Andi Shyti 
---
 lib/i915/gem_context.c   |  37 +++
 lib/i915/gem_context.h   |   8 +
 lib/igt_dummyload.c  |   3 +-
 lib/ioctl_wrappers.c |   1 +
 tests/Makefile.sources   |   3 +
 tests/i915/gem_ctx_persistence.c | 407 +++
 tests/meson.build|   1 +
 7 files changed, 459 insertions(+), 1 deletion(-)
 create mode 100644 tests/i915/gem_ctx_persistence.c

diff --git a/lib/i915/gem_context.c b/lib/i915/gem_context.c
index 83c5df961..1fae5191f 100644
--- a/lib/i915/gem_context.c
+++ b/lib/i915/gem_context.c
@@ -272,6 +272,43 @@ void gem_context_set_priority(int fd, uint32_t ctx_id, int 
prio)
igt_assert_eq(__gem_context_set_priority(fd, ctx_id, prio), 0);
 }
 
+/**
+ * __gem_context_set_persistence:
+ * @i915: open i915 drm file descriptor
+ * @ctx: i915 context id
+ * @state: desired persistence
+ *
+ * Declare whether this context is allowed to persist after closing until
+ * its requests are complete (persistent=true) or if it should be
+ * immediately reaped on closing and its requests cancelled
+ * (persistent=false).
+ *
+ * Returns: An integer equal to zero for success and negative for failure
+ */
+int __gem_context_set_persistence(int i915, uint32_t ctx, bool state)
+{
+   struct drm_i915_gem_context_param p = {
+   .ctx_id = ctx,
+   .param = I915_CONTEXT_PARAM_PERSISTENCE,
+   .value = state,
+   };
+
+   return __gem_context_set_param(i915, );
+}
+
+/**
+ * __gem_context_set_persistence:
+ * @i915: open i915 drm file descriptor
+ * @ctx: i915 context id
+ * @state: desired persistence
+ *
+ * Like __gem_context_set_persistence(), except we assert on failure.
+ */
+void gem_context_set_persistence(int i915, uint32_t ctx, bool state)
+{
+   igt_assert_eq(__gem_context_set_persistence(i915, ctx, state), 0);
+}
+
 int
 __gem_context_clone(int i915,
uint32_t src, unsigned int share,
diff --git a/lib/i915/gem_context.h b/lib/i915/gem_context.h
index 8043c3401..c0d4c9615 100644
--- a/lib/i915/gem_context.h
+++ b/lib/i915/gem_context.h
@@ -24,6 +24,11 @@
 #ifndef GEM_CONTEXT_H
 #define GEM_CONTEXT_H
 
+#include 
+#include 
+
+struct drm_i915_gem_context_param;
+
 uint32_t gem_context_create(int fd);
 int __gem_context_create(int fd, uint32_t *ctx_id);
 void gem_context_destroy(int fd, uint32_t ctx_id);
@@ -58,6 +63,9 @@ int __gem_context_get_param(int fd, struct 
drm_i915_gem_context_param *p);
 int __gem_context_set_priority(int fd, uint32_t ctx, int prio);
 void gem_context_set_priority(int fd, uint32_t ctx, int prio);
 
+int __gem_context_set_persistence(int i915, uint32_t ctx, bool state);
+void gem_context_set_persistence(int i915, uint32_t ctx, bool state);
+
 bool gem_context_has_engine(int fd, uint32_t ctx, uint64_t engine);
 
 #endif /* GEM_CONTEXT_H */
diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c
index 65b5cc927..6060878dd 100644
--- a/lib/igt_dummyload.c
+++ b/lib/igt_dummyload.c
@@ -450,7 +450,8 @@ void igt_spin_free(int fd, igt_spin_t *spin)
gem_close(fd, spin->poll_handle);
}
 
-   gem_close(fd, spin->handle);
+   if (spin->handle)
+   gem_close(fd, spin->handle);
 
if (spin->out_fence >= 0)
close(spin->out_fence);
diff --git a/lib/ioctl_wrappers.c b/lib/ioctl_wrappers.c
index 280fdd624..628f8b830 100644
--- a/lib/ioctl_wrappers.c
+++ b/lib/ioctl_wrappers.c
@@ -445,6 +445,7 @@ int gem_wait(int fd, uint32_t handle, int64_t *timeout_ns)
ret = 0;
if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_WAIT, ))
ret = -errno;
+   errno = 0;
 
if (timeout_ns)
*timeout_ns = wait.timeout_ns;
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 343be0500..093eb57f3 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -154,6 +154,9 @@ gem_ctx_isolation_SOURCES = i915/gem_ctx_isolation.c
 TESTS_progs += gem_ctx_param
 gem_ctx_param_SOURCES = i915/gem_ctx_param.c
 
+TESTS_progs += gem_ctx_persistence
+gem_ctx_persistence_SOURCES = i915/gem_ctx_persistence.c
+
 TESTS_progs += gem_ctx_shared
 gem_ctx_shared_SOURCES = i915/gem_ctx_shared.c
 
diff --git a/tests/i915/gem_ctx_persistence.c b/tests/i915/gem_ctx_persistence.c
new file mode 100644
index 0..854c146ec
--- /dev/null
+++ b/tests/i915/gem_ctx_persistence.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, 

[Intel-gfx] [PATCH 1/2] drm/i915/perf: store the associated engine of a stream

2019-10-10 Thread Chris Wilson
From: Lionel Landwerlin 

We'll use this information later to verify that a client trying to
reconfigure the stream does so on the right engine. For now, we want to
pull the knowledge of which engine we use into a central property.

Signed-off-by: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/i915_perf.c   | 30 ++
 drivers/gpu/drm/i915/i915_perf_types.h |  5 +
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 5a34cad7d824..1a5c6591b9bb 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -197,6 +197,7 @@
 
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_pm.h"
+#include "gt/intel_engine_user.h"
 #include "gt/intel_lrc_reg.h"
 
 #include "i915_drv.h"
@@ -347,6 +348,7 @@ static const struct i915_oa_format 
gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
  * @oa_format: An OA unit HW report format
  * @oa_periodic: Whether to enable periodic OA unit sampling
  * @oa_period_exponent: The OA unit sampling period is derived from this
+ * @engine: The engine (typically rcs0) being monitored by the OA unit
  *
  * As read_properties_unlocked() enumerates and validates the properties given
  * to open a stream of metrics the configuration is built up in the structure
@@ -363,6 +365,8 @@ struct perf_open_properties {
int oa_format;
bool oa_periodic;
int oa_period_exponent;
+
+   struct intel_engine_cs *engine;
 };
 
 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
@@ -1205,7 +1209,7 @@ static struct intel_context *oa_pin_context(struct 
i915_perf_stream *stream)
int err;
 
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
-   if (ce->engine->class != RENDER_CLASS)
+   if (ce->engine != stream->engine) /* first match! */
continue;
 
/*
@@ -2127,7 +2131,13 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
int format_size;
int ret;
 
-   /* If the sysfs metrics/ directory wasn't registered for some
+   if (!props->engine) {
+   DRM_DEBUG("OA engine not specified\n");
+   return -EINVAL;
+   }
+
+   /*
+* If the sysfs metrics/ directory wasn't registered for some
 * reason then don't let userspace try their luck with config
 * IDs
 */
@@ -2146,7 +2156,8 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
return -ENODEV;
}
 
-   /* To avoid the complexity of having to accurately filter
+   /*
+* To avoid the complexity of having to accurately filter
 * counter reports and marshal to the appropriate client
 * we currently only allow exclusive access
 */
@@ -2160,6 +2171,9 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
return -EINVAL;
}
 
+   stream->engine = props->engine;
+   stream->gt = stream->engine->gt;
+
stream->sample_size = sizeof(struct drm_i915_perf_record_header);
 
format_size = perf->oa_formats[props->oa_format].size;
@@ -2711,7 +2725,6 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
}
 
stream->perf = perf;
-   stream->gt = >i915->gt;
stream->ctx = specific_ctx;
 
ret = i915_oa_stream_init(stream, param, props);
@@ -2796,6 +2809,15 @@ static int read_properties_unlocked(struct i915_perf 
*perf,
return -EINVAL;
}
 
+   /* At the moment we only support using i915-perf on the RCS. */
+   props->engine = intel_engine_lookup_user(perf->i915,
+I915_ENGINE_CLASS_RENDER,
+0);
+   if (!props->engine) {
+   DRM_DEBUG("No RENDER-capable engines\n");
+   return -EINVAL;
+   }
+
/* Considering that ID = 0 is reserved and assuming that we don't
 * (currently) expect any configurations to ever specify duplicate
 * values for a particular property ID then the last _PROP_MAX value is
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h 
b/drivers/gpu/drm/i915/i915_perf_types.h
index 2d17059d32ee..82cd3b295037 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -140,6 +140,11 @@ struct i915_perf_stream {
 */
intel_wakeref_t wakeref;
 
+   /**
+* @engine: Engine associated with this performance stream.
+*/
+   struct intel_engine_cs *engine;
+
/**
 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
 * properties given when opening a stream, representing the contents
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org

[Intel-gfx] [PATCH 2/2] drm/i915/perf: Store shortcut to intel_uncore

2019-10-10 Thread Chris Wilson
Now that we have the engine stored in i915_perf, we have a means of
accessing intel_gt should we require it. However, we are currently only
using the intel_gt to find the right intel_uncore, so replace our
i915_perf.gt pointer with the more useful i915_perf.uncore.

Signed-off-by: Chris Wilson 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/i915_perf.c   | 48 +-
 drivers/gpu/drm/i915/i915_perf_types.h |  4 +--
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 1a5c6591b9bb..77c3cef64548 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -419,14 +419,14 @@ static int get_oa_config(struct i915_perf *perf,
 
 static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
 
return intel_uncore_read(uncore, GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
 }
 
 static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
u32 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
 
return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
@@ -656,7 +656,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
*stream,
  size_t count,
  size_t *offset)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
int report_size = stream->oa_buffer.format_size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
@@ -866,7 +866,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
size_t count,
size_t *offset)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
u32 oastatus;
int ret;
 
@@ -945,7 +945,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream 
*stream,
  size_t count,
  size_t *offset)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
int report_size = stream->oa_buffer.format_size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
@@ -1077,7 +1077,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
size_t count,
size_t *offset)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
u32 oastatus1;
int ret;
 
@@ -1352,8 +1352,8 @@ static void i915_oa_stream_destroy(struct 
i915_perf_stream *stream)
 
free_oa_buffer(stream);
 
-   intel_uncore_forcewake_put(stream->gt->uncore, FORCEWAKE_ALL);
-   intel_runtime_pm_put(stream->gt->uncore->rpm, stream->wakeref);
+   intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
+   intel_runtime_pm_put(stream->uncore->rpm, stream->wakeref);
 
if (stream->ctx)
oa_put_render_ctx_id(stream);
@@ -1368,7 +1368,7 @@ static void i915_oa_stream_destroy(struct 
i915_perf_stream *stream)
 
 static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
unsigned long flags;
 
@@ -1416,7 +1416,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream 
*stream)
 
 static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
unsigned long flags;
 
@@ -1565,7 +1565,7 @@ static void delay_after_mux(void)
 
 static int hsw_enable_metric_set(struct i915_perf_stream *stream)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
const struct i915_oa_config *oa_config = stream->oa_config;
 
/*
@@ -1594,7 +1594,7 @@ static int hsw_enable_metric_set(struct i915_perf_stream 
*stream)
 
 static void hsw_disable_metric_set(struct i915_perf_stream *stream)
 {
-   struct intel_uncore *uncore = stream->gt->uncore;
+   struct intel_uncore *uncore = stream->uncore;
 
intel_uncore_rmw(uncore, GEN6_UCGCTL1,
 GEN6_CSUNIT_CLOCK_GATE_DISABLE, 0

[Intel-gfx] [PATCH 04/10] drm/i915/execlists: Force preemption

2019-10-10 Thread Chris Wilson
If the preempted context takes too long to relinquish control, e.g. it
is stuck inside a shader with arbitration disabled, evict that context
with an engine reset. This ensures that preemptions are reasonably
responsive, providing a tighter QoS for the more important context at
the cost of flagging unresponsive contexts more frequently (i.e. instead
of using an ~10s hangcheck, we now evict at ~100ms).  The challenge of
lies in picking a timeout that can be reasonably serviced by HW for
typical workloads, balancing the existing clients against the needs for
responsiveness.

Note that coupled with timeslicing, this will lead to rapid GPU "hang"
detection with multiple active contexts vying for GPU time.

The preempt timeout can be adjusted per-engine using,

/sys/class/drm/card?/engine/*/preempt_timeout_ms

v2: Couple in sysfs control of preemption timeout

Signed-off-by: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Tvrtko Ursulin 
Reviewed-by: Mika Kuoppala 
---
 drivers/gpu/drm/i915/Kconfig.profile | 15 
 drivers/gpu/drm/i915/gt/intel_engine_cs.c|  2 +
 drivers/gpu/drm/i915/gt/intel_engine_sysfs.c | 32 +++
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  9 ++
 drivers/gpu/drm/i915/gt/intel_lrc.c  | 95 ++--
 drivers/gpu/drm/i915/i915_params.h   |  2 +-
 6 files changed, 146 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.profile 
b/drivers/gpu/drm/i915/Kconfig.profile
index 48df8889a88a..8fceea85937b 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -25,3 +25,18 @@ config DRM_I915_SPIN_REQUEST
  May be 0 to disable the initial spin. In practice, we estimate
  the cost of enabling the interrupt (if currently disabled) to be
  a few microseconds.
+
+config DRM_I915_PREEMPT_TIMEOUT
+   int "Preempt timeout (ms)"
+   default 100 # milliseconds
+   help
+ How long to wait (in milliseconds) for a preemption event to occur
+ when submitting a new context via execlists. If the current context
+ does not hit an arbitration point and yield to HW before the timer
+ expires, the HW will be reset to allow the more important context
+ to execute.
+
+ This is adjustable via
+ /sys/class/drm/card?/engine/*/preempt_timeout_ms
+
+ May be 0 to disable the timeout.
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index c9d639c6becb..1eb51147839a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -304,6 +304,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum 
intel_engine_id id)
engine->instance = info->instance;
__sprint_engine_name(engine);
 
+   engine->props.preempt_timeout = CONFIG_DRM_I915_PREEMPT_TIMEOUT;
+
/*
 * To be overridden by the backend on setup. However to facilitate
 * cleanup on error during setup, we always provide the destroy vfunc.
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
index cbe9ec59beeb..aac26097c916 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
@@ -45,10 +45,37 @@ mmio_show(struct kobject *kobj, struct kobj_attribute 
*attr, char *buf)
return sprintf(buf, "0x%x\n", kobj_to_engine(kobj)->mmio_base);
 }
 
+static ssize_t
+preempt_timeout_show(struct kobject *kobj, struct kobj_attribute *attr,
+char *buf)
+{
+   struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+   return sprintf(buf, "%lu\n", engine->props.preempt_timeout);
+}
+
+static ssize_t
+preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+   struct intel_engine_cs *engine = kobj_to_engine(kobj);
+   unsigned long timeout;
+   int err;
+
+   err = kstrtoul(buf, 0, );
+   if (err)
+   return err;
+
+   engine->props.preempt_timeout = timeout;
+   return count;
+}
+
 static struct kobj_attribute name_attr = __ATTR(name, 0444, name_show, NULL);
 static struct kobj_attribute class_attr = __ATTR(class, 0444, class_show, 
NULL);
 static struct kobj_attribute inst_attr = __ATTR(instance, 0444, inst_show, 
NULL);
 static struct kobj_attribute mmio_attr = __ATTR(mmio_base, 0444, mmio_show, 
NULL);
+static struct kobj_attribute preempt_timeout_attr =
+__ATTR(preempt_timeout_ms, 0600, preempt_timeout_show, preempt_timeout_store);
 
 static void kobj_engine_release(struct kobject *kobj)
 {
@@ -109,6 +136,11 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915)
if (sysfs_create_files(kobj, files))
goto err_engine;
 
+   if (CONFIG_DRM_I915_PREEMPT_TIMEOUT

[Intel-gfx] [PATCH 08/10] drm/i915: Cancel non-persistent contexts on close

2019-10-10 Thread Chris Wilson
Normally, we rely on our hangcheck to prevent persistent batches from
hogging the GPU. However, if the user disables hangcheck, this mechanism
breaks down. Despite our insistence that this is unsafe, the users are
equally insistent that they want to use endless batches and will disable
the hangcheck mechanism. We are looking at perhaps replacing hangcheck
with a softer mechanism, that sends a pulse down the engine to check if
it is well. We can use the same preemptive pulse to flush an active
persistent context off the GPU upon context close, preventing resources
being lost and unkillable requests remaining on the GPU after process
termination. To avoid changing the ABI and accidentally breaking
existing userspace, we make the persistence of a context explicit and
enable it by default (matching current ABI). Userspace can opt out of
persistent mode (forcing requests to be cancelled when the context is
closed by process termination or explicitly) by a context parameter. To
facilitate existing use-cases of disabling hangcheck, if the modparam is
disabled (i915.enable_hangcheck=0), we disable persistence mode by
default.  (Note, one of the outcomes for supporting endless mode will be
the removal of hangchecking, at which point opting into persistent mode
will be mandatory, or maybe the default perhaps controlled by cgroups.)

v2: Check for hangchecking at context termination, so that we are not
left with undying contexts from a crafty user.

Testcase: igt/gem_ctx_persistence
Signed-off-by: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Michał Winiarski 
Cc: Jon Bloomfield 
Reviewed-by: Jon Bloomfield 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 132 ++
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |  15 ++
 .../gpu/drm/i915/gem/i915_gem_context_types.h |   1 +
 .../gpu/drm/i915/gem/selftests/mock_context.c |   2 +
 include/uapi/drm/i915_drm.h   |  15 ++
 5 files changed, 165 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 5d8221c7ba83..46e5b3b53288 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -70,6 +70,7 @@
 #include 
 
 #include "gt/intel_lrc_reg.h"
+#include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_engine_user.h"
 
 #include "i915_gem_context.h"
@@ -269,6 +270,78 @@ void i915_gem_context_release(struct kref *ref)
schedule_work(>free_work);
 }
 
+static inline struct i915_gem_engines *
+__context_engines_static(struct i915_gem_context *ctx)
+{
+   return rcu_dereference_protected(ctx->engines, true);
+}
+
+static void kill_context(struct i915_gem_context *ctx)
+{
+   intel_engine_mask_t tmp, active, reset;
+   struct intel_gt *gt = >i915->gt;
+   struct i915_gem_engines_iter it;
+   struct intel_engine_cs *engine;
+   struct intel_context *ce;
+
+   /*
+* If we are already banned, it was due to a guilty request causing
+* a reset and the entire context being evicted from the GPU.
+*/
+   if (i915_gem_context_is_banned(ctx))
+   return;
+
+   i915_gem_context_set_banned(ctx);
+
+   /*
+* Map the user's engine back to the actual engines; one virtual
+* engine will be mapped to multiple engines, and using ctx->engine[]
+* the same engine may be have multiple instances in the user's map.
+* However, we only care about pending requests, so only include
+* engines on which there are incomplete requests.
+*/
+   active = 0;
+   for_each_gem_engine(ce, __context_engines_static(ctx), it) {
+   struct dma_fence *fence;
+
+   if (!ce->timeline)
+   continue;
+
+   fence = i915_active_fence_get(>timeline->last_request);
+   if (!fence)
+   continue;
+
+   engine = to_request(fence)->engine;
+   if (HAS_EXECLISTS(gt->i915))
+   engine = intel_context_inflight(ce);
+   if (engine)
+   active |= engine->mask;
+
+   dma_fence_put(fence);
+   }
+
+   /*
+* Send a "high priority pulse" down the engine to cause the
+* current request to be momentarily preempted. (If it fails to
+* be preempted, it will be reset). As we have marked our context
+* as banned, any incomplete request, including any running, will
+* be skipped following the preemption.
+*/
+   reset = 0;
+   for_each_engine_masked(engine, gt->i915, active, tmp)
+   if (intel_engine_pulse(engine))
+   reset |= engine->mask;
+
+   /*
+* If we are unable to send a preemptive pulse to bump
+* the context from the GPU, we have to resort to a full
+

[Intel-gfx] [PATCH 02/10] drm/i915/execlists: Leave tell-tales as to why pending[] is bad

2019-10-10 Thread Chris Wilson
Before we BUG out with bad pending state, leave a telltale as to which
test failed.

Signed-off-by: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 30 -
 drivers/gpu/drm/i915/i915_gem.h |  8 
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index a0777b3ad68a..5040fbdd81af 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1138,25 +1138,45 @@ assert_pending_valid(const struct 
intel_engine_execlists *execlists,
 
trace_ports(execlists, msg, execlists->pending);
 
-   if (!execlists->pending[0])
+   if (!execlists->pending[0]) {
+   GEM_TRACE_ERR("Nothing pending for promotion!\n");
return false;
+   }
 
-   if (execlists->pending[execlists_num_ports(execlists)])
+   if (execlists->pending[execlists_num_ports(execlists)]) {
+   GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
+ execlists_num_ports(execlists));
return false;
+   }
 
for (port = execlists->pending; (rq = *port); port++) {
-   if (ce == rq->hw_context)
+   if (ce == rq->hw_context) {
+   GEM_TRACE_ERR("Duplicate context in pending[%zd]\n",
+ port - execlists->pending);
return false;
+   }
 
ce = rq->hw_context;
if (i915_request_completed(rq))
continue;
 
-   if (i915_active_is_idle(>active))
+   if (i915_active_is_idle(>active)) {
+   GEM_TRACE_ERR("Inactive context in pending[%zd]\n",
+ port - execlists->pending);
+   return false;
+   }
+
+   if (!i915_vma_is_pinned(ce->state)) {
+   GEM_TRACE_ERR("Unpinned context in pending[%zd]\n",
+ port - execlists->pending);
return false;
+   }
 
-   if (!i915_vma_is_pinned(ce->state))
+   if (!i915_vma_is_pinned(ce->ring->vma)) {
+   GEM_TRACE_ERR("Unpinned ringbuffer in pending[%zd]\n",
+ port - execlists->pending);
return false;
+   }
}
 
return ce;
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 6795f1daa3d5..63dab3765106 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -37,10 +37,8 @@ struct drm_i915_private;
 #define GEM_SHOW_DEBUG() (drm_debug & DRM_UT_DRIVER)
 
 #define GEM_BUG_ON(condition) do { if (unlikely((condition))) {\
-   pr_err("%s:%d GEM_BUG_ON(%s)\n", \
-  __func__, __LINE__, __stringify(condition)); \
-   GEM_TRACE("%s:%d GEM_BUG_ON(%s)\n", \
- __func__, __LINE__, __stringify(condition)); \
+   GEM_TRACE_ERR("%s:%d GEM_BUG_ON(%s)\n", \
+ __func__, __LINE__, __stringify(condition)); \
BUG(); \
} \
} while(0)
@@ -66,11 +64,13 @@ struct drm_i915_private;
 
 #if IS_ENABLED(CONFIG_DRM_I915_TRACE_GEM)
 #define GEM_TRACE(...) trace_printk(__VA_ARGS__)
+#define GEM_TRACE_ERR(...) do { pr_err(__VA_ARGS__); 
trace_printk(__VA_ARGS__); } while (0)
 #define GEM_TRACE_DUMP() ftrace_dump(DUMP_ALL)
 #define GEM_TRACE_DUMP_ON(expr) \
do { if (expr) ftrace_dump(DUMP_ALL); } while (0)
 #else
 #define GEM_TRACE(...) do { } while (0)
+#define GEM_TRACE_ERR(...) do { } while (0)
 #define GEM_TRACE_DUMP() do { } while (0)
 #define GEM_TRACE_DUMP_ON(expr) BUILD_BUG_ON_INVALID(expr)
 #endif
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 10/10] drm/i915: Flush idle barriers when waiting

2019-10-10 Thread Chris Wilson
If we do find ourselves with an idle barrier inside our active while
waiting, attempt to flush it by emitting a pulse using the kernel
context.

Signed-off-by: Chris Wilson 
---
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  | 14 +
 .../gpu/drm/i915/gt/intel_engine_heartbeat.h  |  1 +
 drivers/gpu/drm/i915/i915_active.c| 21 +--
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c 
b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index f68acf9118f3..e27bb7f028bd 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -169,3 +169,17 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
intel_engine_pm_put(engine);
return err;
 }
+
+int intel_engine_flush_barriers(struct intel_engine_cs *engine)
+{
+   struct i915_request *rq;
+
+   rq = i915_request_create(engine->kernel_context);
+   if (IS_ERR(rq))
+   return PTR_ERR(rq);
+
+   idle_pulse(engine, rq);
+   i915_request_add(rq);
+
+   return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h 
b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
index 39391004554d..0c1ad0fc091d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
@@ -15,5 +15,6 @@ void intel_engine_park_heartbeat(struct intel_engine_cs 
*engine);
 void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine);
 
 int intel_engine_pulse(struct intel_engine_cs *engine);
+int intel_engine_flush_barriers(struct intel_engine_cs *engine);
 
 #endif /* INTEL_ENGINE_HEARTBEAT_H */
diff --git a/drivers/gpu/drm/i915/i915_active.c 
b/drivers/gpu/drm/i915/i915_active.c
index aa37c07004b9..98d5fe1c7e19 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -6,6 +6,7 @@
 
 #include 
 
+#include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_engine_pm.h"
 
 #include "i915_drv.h"
@@ -435,6 +436,21 @@ static void enable_signaling(struct i915_active_fence 
*active)
dma_fence_put(fence);
 }
 
+static int flush_barrier(struct active_node *it)
+{
+   struct intel_engine_cs *engine;
+
+   if (!is_barrier(>base))
+   return 0;
+
+   engine = __barrier_to_engine(it);
+   smp_rmb(); /* serialise with add_active_barriers */
+   if (!is_barrier(>base))
+   return 0;
+
+   return intel_engine_flush_barriers(engine);
+}
+
 int i915_active_wait(struct i915_active *ref)
 {
struct active_node *it, *n;
@@ -448,8 +464,9 @@ int i915_active_wait(struct i915_active *ref)
/* Flush lazy signals */
enable_signaling(>excl);
rbtree_postorder_for_each_entry_safe(it, n, >tree, node) {
-   if (is_barrier(>base)) /* unconnected idle barrier */
-   continue;
+   err = flush_barrier(it); /* unconnected idle barrier? */
+   if (err)
+   break;
 
enable_signaling(>base);
}
-- 
2.23.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 07/10] drm/i915/execlists: Cancel banned contexts on schedule-out

2019-10-10 Thread Chris Wilson
On completion of a banned context, scrub the context image so that we do
not replay the active payload. The intent is that we skip banned
payloads on request submission so that the timeline advancement
continues on in the background. However, if we are returning to a
preempted request, i915_request_skip() is ineffective and instead we
need to patch up the context image so that it continues from the start
of the next request.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c|  58 ++
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 273 +
 2 files changed, 331 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index eb99f1e804f7..79c7ebea2fcc 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -234,6 +234,9 @@ static void execlists_init_reg_state(u32 *reg_state,
 const struct intel_engine_cs *engine,
 const struct intel_ring *ring,
 bool close);
+static void
+__execlists_update_reg_state(const struct intel_context *ce,
+const struct intel_engine_cs *engine);
 
 static void __context_pin_acquire(struct intel_context *ce)
 {
@@ -1022,6 +1025,58 @@ static void kick_siblings(struct i915_request *rq, 
struct intel_context *ce)
tasklet_schedule(>base.execlists.tasklet);
 }
 
+static void
+mark_complete(struct i915_request *rq, struct intel_engine_cs *engine)
+{
+   const struct intel_timeline * const tl = rcu_dereference(rq->timeline);
+
+   *(u32 *)tl->hwsp_seqno = rq->fence.seqno;
+   GEM_BUG_ON(!i915_request_completed(rq));
+
+   list_for_each_entry_from_reverse(rq, >requests, link) {
+   if (i915_request_signaled(rq))
+   break;
+
+   mark_eio(rq);
+   }
+
+   intel_engine_queue_breadcrumbs(engine);
+}
+
+static void cancel_active(struct i915_request *rq,
+ struct intel_engine_cs *engine)
+{
+   struct intel_context * const ce = rq->hw_context;
+   u32 *regs = ce->lrc_reg_state;
+
+   if (i915_request_completed(rq))
+   return;
+
+   GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
+ __func__, engine->name, rq->fence.context, rq->fence.seqno);
+   __context_pin_acquire(ce);
+
+   /* Scrub the context image to prevent replaying the previous batch */
+   memcpy(regs, /* skip restoring the vanilla PPHWSP */
+  engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
+  engine->context_size - PAGE_SIZE);
+   execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+
+   /* Ring will be advanced on retire; here we need to reset the context */
+   ce->ring->head = intel_ring_wrap(ce->ring, rq->wa_tail);
+   __execlists_update_reg_state(ce, engine);
+
+   /* We've switched away, so this should be a no-op, but intent matters */
+   ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+
+   /* Let everyone know that the request may now be retired */
+   rcu_read_lock();
+   mark_complete(rq, engine);
+   rcu_read_unlock();
+
+   __context_pin_release(ce);
+}
+
 static inline void
 __execlists_schedule_out(struct i915_request *rq,
 struct intel_engine_cs * const engine)
@@ -1032,6 +1087,9 @@ __execlists_schedule_out(struct i915_request *rq,
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
intel_gt_pm_put(engine->gt);
 
+   if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
+   cancel_active(rq, engine);
+
/*
 * If this is part of a virtual engine, its next request may
 * have been blocked waiting for access to the active context.
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 198cf2f754f4..1703130ef0ef 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -7,6 +7,7 @@
 #include 
 
 #include "gem/i915_gem_pm.h"
+#include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_reset.h"
 
 #include "i915_selftest.h"
@@ -986,6 +987,277 @@ static int live_nopreempt(void *arg)
goto err_client_b;
 }
 
+struct live_preempt_cancel {
+   struct intel_engine_cs *engine;
+   struct preempt_client a, b;
+};
+
+static int __cancel_active0(struct live_preempt_cancel *arg)
+{
+   struct i915_request *rq;
+   struct igt_live_test t;
+   int err;
+
+   /* Preempt cancel of ELSP0 */
+   GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+
+   if (igt_live_test_begin(, arg->engine->i915,
+   __func__, arg->engine->name))
+   return -EIO;
+
+   

[Intel-gfx] [PATCH 03/10] drm/i915: Expose engine properties via sysfs

2019-10-10 Thread Chris Wilson
Preliminary stub to add engines underneath /sys/class/drm/cardN/, so
that we can expose properties on each engine to the sysadmin.

To start with we have basic analogues of the i915_query ioctl so that we
can pretty print engine discovery from the shell, and flesh out the
directory structure. Later we will add writeable sysadmin properties such
as per-engine timeout controls.

An example tree of the engine properties on Braswell:
/sys/class/drm/card0
└── engine
    ├── bcs0
    │   ├── class
    │   ├── heartbeat_interval_ms
    │   ├── instance
    │   ├── mmio_base
    │   └── name
    ├── rcs0
    │   ├── class
    │   ├── heartbeat_interval_ms
    │   ├── instance
    │   ├── mmio_base
    │   └── name
    ├── vcs0
    │   ├── class
    │   ├── heartbeat_interval_ms
    │   ├── instance
    │   ├── mmio_base
    │   └── name
    └── vecs0
    ├── class
    ├── heartbeat_interval_ms
    ├── instance
    ├── mmio_base
    └── name

Signed-off-by: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Tvrtko Ursulin 
Cc: Daniele Ceraolo Spurio 
Cc: Rodrigo Vivi 
Acked-by: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/Makefile|   3 +-
 drivers/gpu/drm/i915/gt/intel_engine_sysfs.c | 119 +++
 drivers/gpu/drm/i915/gt/intel_engine_sysfs.h |  14 +++
 drivers/gpu/drm/i915/i915_sysfs.c|   3 +
 4 files changed, 138 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_sysfs.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index e791d9323b51..cd9a10ba2516 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -78,8 +78,9 @@ gt-y += \
gt/intel_breadcrumbs.o \
gt/intel_context.o \
gt/intel_engine_cs.o \
-   gt/intel_engine_pool.o \
gt/intel_engine_pm.o \
+   gt/intel_engine_pool.o \
+   gt/intel_engine_sysfs.o \
gt/intel_engine_user.o \
gt/intel_gt.o \
gt/intel_gt_irq.o \
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
new file mode 100644
index ..cbe9ec59beeb
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
@@ -0,0 +1,119 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include 
+#include 
+
+#include "i915_drv.h"
+#include "intel_engine.h"
+#include "intel_engine_sysfs.h"
+
+struct kobj_engine {
+   struct kobject base;
+   struct intel_engine_cs *engine;
+};
+
+static struct intel_engine_cs *kobj_to_engine(struct kobject *kobj)
+{
+   return container_of(kobj, struct kobj_engine, base)->engine;
+}
+
+static ssize_t
+name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+   return sprintf(buf, "%s\n", kobj_to_engine(kobj)->name);
+}
+
+static ssize_t
+class_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+   return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_class);
+}
+
+static ssize_t
+inst_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+   return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_instance);
+}
+
+static ssize_t
+mmio_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+   return sprintf(buf, "0x%x\n", kobj_to_engine(kobj)->mmio_base);
+}
+
+static struct kobj_attribute name_attr = __ATTR(name, 0444, name_show, NULL);
+static struct kobj_attribute class_attr = __ATTR(class, 0444, class_show, 
NULL);
+static struct kobj_attribute inst_attr = __ATTR(instance, 0444, inst_show, 
NULL);
+static struct kobj_attribute mmio_attr = __ATTR(mmio_base, 0444, mmio_show, 
NULL);
+
+static void kobj_engine_release(struct kobject *kobj)
+{
+   kfree(kobj);
+}
+
+static struct kobj_type kobj_engine_type = {
+   .release = kobj_engine_release,
+   .sysfs_ops = _sysfs_ops
+};
+
+static struct kobject *
+kobj_engine(struct kobject *dir, struct intel_engine_cs *engine)
+{
+   struct kobj_engine *ke;
+
+   ke = kzalloc(sizeof(*ke), GFP_KERNEL);
+   if (!ke)
+   return NULL;
+
+   kobject_init(>base, _engine_type);
+   ke->engine = engine;
+
+   if (kobject_add(>base, dir, "%s", engine->name)) {
+   kobject_put(>base);
+   return NULL;
+   }
+
+   /* xfer ownership to sysfs tree */
+   return >base;
+}
+
+void intel_engines_add_sysfs(struct drm_i915_private *i915)
+{
+   static const struct attribute *files[] = {
+   _attr.attr,
+   _attr.attr,
+   _attr.attr,
+   _attr.attr,
+   NULL
+   };
+
+   struct device *kdev = i915->drm.primary->kdev;
+   

  1   2   3   4   5   6   7   8   9   10   >