[Intel-gfx] [PATCH v4 3/7] drm/i915: Add a batch pool debugfs file
From: Brad Volkin bradley.d.vol...@intel.com It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 41 + 1 file changed, 41 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 319da61..efdd59a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -582,6 +582,46 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) return 0; } +static int i915_gem_batch_pool_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m-private; + struct drm_device *dev = node-minor-dev; + struct drm_i915_private *dev_priv = dev-dev_private; + struct drm_i915_gem_object *obj; + int count = 0; + int ret; + + ret = mutex_lock_interruptible(dev-struct_mutex); + if (ret) + return ret; + + seq_puts(m, active:\n); + list_for_each_entry(obj, + dev_priv-mm.batch_pool.active_list, + batch_pool_list) { + seq_puts(m,); + describe_obj(m, obj); + seq_putc(m, '\n'); + count++; + } + + seq_puts(m, inactive:\n); + list_for_each_entry(obj, + dev_priv-mm.batch_pool.inactive_list, + batch_pool_list) { + seq_puts(m,); + describe_obj(m, obj); + seq_putc(m, '\n'); + count++; + } + + seq_printf(m, total: %d\n, count); + + mutex_unlock(dev-struct_mutex); + + return 0; +} + static int i915_gem_request_info(struct seq_file *m, void *data) { struct drm_info_node *node = m-private; @@ -4262,6 +4302,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {i915_gem_hws_blt, i915_hws_info, 0, (void *)BCS}, {i915_gem_hws_bsd, i915_hws_info, 0, (void *)VCS}, {i915_gem_hws_vebox, i915_hws_info, 0, (void *)VECS}, + {i915_gem_batch_pool, i915_gem_batch_pool_info, 0}, {i915_frequency_info, i915_frequency_info, 0}, {i915_drpc_info, i915_drpc_info, 0}, {i915_emon_status, i915_emon_status, 0}, -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v4 4/7] drm/i915: Add batch pool details to i915_gem_objects debugfs
From: Brad Volkin bradley.d.vol...@intel.com To better account for the potentially large memory consumption of the batch pool. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 45 + 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index efdd59a..60d5ceb 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -360,6 +360,38 @@ static int per_file_stats(int id, void *ptr, void *data) return 0; } +#define print_file_stats(m, name, stats) \ + seq_printf(m, %s: %u objects, %zu bytes (%zu active, %zu inactive, %zu global, %zu shared, %zu unbound)\n, \ + name, \ + stats.count, \ + stats.total, \ + stats.active, \ + stats.inactive, \ + stats.global, \ + stats.shared, \ + stats.unbound) + +static void print_batch_pool_stats(struct seq_file *m, + struct drm_i915_private *dev_priv) +{ + struct drm_i915_gem_object *obj; + struct file_stats stats; + + memset(stats, 0, sizeof(stats)); + + list_for_each_entry(obj, + dev_priv-mm.batch_pool.active_list, + batch_pool_list) + per_file_stats(0, obj, stats); + + list_for_each_entry(obj, + dev_priv-mm.batch_pool.inactive_list, + batch_pool_list) + per_file_stats(0, obj, stats); + + print_file_stats(m, batch pool, stats); +} + #define count_vmas(list, member) do { \ list_for_each_entry(vma, list, member) { \ size += i915_gem_obj_ggtt_size(vma-obj); \ @@ -442,6 +474,9 @@ static int i915_gem_object_info(struct seq_file *m, void* data) dev_priv-gtt.mappable_end - dev_priv-gtt.base.start); seq_putc(m, '\n'); + print_batch_pool_stats(m, dev_priv); + + seq_putc(m, '\n'); list_for_each_entry_reverse(file, dev-filelist, lhead) { struct file_stats stats; struct task_struct *task; @@ -459,15 +494,7 @@ static int i915_gem_object_info(struct seq_file *m, void* data) */ rcu_read_lock(); task = pid_task(file-pid, PIDTYPE_PID); - seq_printf(m, %s: %u objects, %zu bytes (%zu active, %zu inactive, %zu global, %zu shared, %zu unbound)\n, - task ? task-comm : unknown, - stats.count, - stats.total, - stats.active, - stats.inactive, - stats.global, - stats.shared, - stats.unbound); + print_file_stats(m, task ? task-comm : unknown, stats); rcu_read_unlock(); } -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v4 6/7] drm/i915: Mark shadow batch buffers as purgeable
From: Brad Volkin bradley.d.vol...@intel.com By adding a new exec_entry flag, we cleanly mark the shadow objects as purgeable after they are on the active list. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 20835b8..a271bc0 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -37,6 +37,7 @@ #define __EXEC_OBJECT_HAS_FENCE (130) #define __EXEC_OBJECT_NEEDS_MAP (129) #define __EXEC_OBJECT_NEEDS_BIAS (128) +#define __EXEC_OBJECT_PURGEABLE (127) #define BATCH_OFFSET_BIAS (256*1024) @@ -223,7 +224,12 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) if (entry-flags __EXEC_OBJECT_HAS_PIN) vma-pin_count--; - entry-flags = ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); + if (entry-flags __EXEC_OBJECT_PURGEABLE) + obj-madv = I915_MADV_DONTNEED; + + entry-flags = ~(__EXEC_OBJECT_HAS_FENCE | + __EXEC_OBJECT_HAS_PIN | + __EXEC_OBJECT_PURGEABLE); } static void eb_destroy(struct eb_vmas *eb) @@ -1373,6 +1379,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; } + shadow_batch_obj-madv = I915_MADV_WILLNEED; + ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0); if (ret) goto err; @@ -1396,6 +1404,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, vma = i915_gem_obj_to_ggtt(shadow_batch_obj); vma-exec_entry = shadow_exec_entry; + vma-exec_entry-flags = __EXEC_OBJECT_PURGEABLE; drm_gem_object_reference(shadow_batch_obj-base); list_add_tail(vma-exec_list, eb-vmas); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v4 0/7] Command parser batch buffer copy
From: Brad Volkin bradley.d.vol...@intel.com This is v4 of the series I sent here: http://lists.freedesktop.org/archives/intel-gfx/2014-November/054733.html This version incorporates most of the feedback from v3. The couple of things that I missed (mostly for timing reasons) are: * Move 'pending_read_domains |= I915_GEM_DOMAIN_COMMAND' after the parser * Maybe remove the memsets from the batch copy function * Today's feedback from Chris and Daniel r.e. madv I'd suggest that the first two could be small follow up patches, and the madv changes I did based on Daniel's earlier comments were pulled into a separate patch that could be rewritten or modified as needed. Brad Volkin (7): drm/i915: Implement a framework for batch buffer pools drm/i915: Use batch pools with the command parser drm/i915: Add a batch pool debugfs file drm/i915: Add batch pool details to i915_gem_objects debugfs drm/i915: Use batch length instead of object size in command parser drm/i915: Mark shadow batch buffers as purgeable drm/i915: Tidy up execbuffer command parsing code Documentation/DocBook/drm.tmpl | 5 + drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_cmd_parser.c | 97 ++ drivers/gpu/drm/i915/i915_debugfs.c| 86 ++-- drivers/gpu/drm/i915/i915_dma.c| 1 + drivers/gpu/drm/i915/i915_drv.h| 24 + drivers/gpu/drm/i915/i915_gem.c| 3 + drivers/gpu/drm/i915/i915_gem_batch_pool.c | 152 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 105 9 files changed, 430 insertions(+), 44 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v4 2/7] drm/i915: Use batch pools with the command parser
From: Brad Volkin bradley.d.vol...@intel.com This patch sets up all of the tracking and copying necessary to use batch pools with the command parser and dispatches the copied (shadow) batch to the hardware. After this patch, the parser is in 'enabling' mode. Note that performance takes a hit from the copy in some cases and will likely need some work. At a rough pass, the memcpy appears to be the bottleneck. Without having done a deeper analysis, two ideas that come to mind are: 1) Copy sections of the batch at a time, as they are reached by parsing. Might improve cache locality. 2) Copy only up to the userspace-supplied batch length and memset the rest of the buffer. Reduces the number of reads. v2: - Remove setting the capacity of the pool - One global pool instead of per-ring pools - Replace batch_obj with shadow_batch_obj and hook into eb-vmas - Memset any space in the shadow batch beyond what gets copied - Rebased on execlist prep refactoring v3: - Rebase on chained batch handling - Squash in setting the secure dispatch flag - Add a note about the interaction w/secure dispatch pinning - Check for request-batch_obj == NULL in i915_gem_free_request v4: - Fix read domains for shadow_batch_obj - Remove the set_to_gtt_domain call from i915_parse_cmds - ggtt_pin/unpin in the parser block to simplify error handling - Check USES_FULL_PPGTT before setting DISPATCH_SECURE flag - Remove i915_gem_batch_pool_put calls Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 79 +++--- drivers/gpu/drm/i915/i915_dma.c| 1 + drivers/gpu/drm/i915/i915_drv.h| 8 +++ drivers/gpu/drm/i915/i915_gem.c| 2 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 49 -- 5 files changed, 117 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 809bb95..5a3f4e4 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -838,6 +838,56 @@ finish: return (u32*)addr; } +/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */ +static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, + struct drm_i915_gem_object *src_obj) +{ + int ret = 0; + int needs_clflush = 0; + u32 *src_addr, *dest_addr = NULL; + + ret = i915_gem_obj_prepare_shmem_read(src_obj, needs_clflush); + if (ret) { + DRM_DEBUG_DRIVER(CMD: failed to prep read\n); + return ERR_PTR(ret); + } + + src_addr = vmap_batch(src_obj); + if (!src_addr) { + DRM_DEBUG_DRIVER(CMD: Failed to vmap batch\n); + ret = -ENOMEM; + goto unpin_src; + } + + if (needs_clflush) + drm_clflush_virt_range((char *)src_addr, src_obj-base.size); + + ret = i915_gem_object_set_to_cpu_domain(dest_obj, true); + if (ret) { + DRM_DEBUG_DRIVER(CMD: Failed to set batch CPU domain\n); + goto unmap_src; + } + + dest_addr = vmap_batch(dest_obj); + if (!dest_addr) { + DRM_DEBUG_DRIVER(CMD: Failed to vmap shadow batch\n); + ret = -ENOMEM; + goto unmap_src; + } + + memcpy(dest_addr, src_addr, src_obj-base.size); + if (dest_obj-base.size src_obj-base.size) + memset((u8 *)dest_addr + src_obj-base.size, 0, + dest_obj-base.size - src_obj-base.size); + +unmap_src: + vunmap(src_addr); +unpin_src: + i915_gem_object_unpin_pages(src_obj); + + return ret ? ERR_PTR(ret) : dest_addr; +} + /** * i915_needs_cmd_parser() - should a given ring use software command parsing? * @ring: the ring in question @@ -954,6 +1004,7 @@ static bool check_cmd(const struct intel_engine_cs *ring, * i915_parse_cmds() - parse a submitted batch buffer for privilege violations * @ring: the ring on which the batch is to execute * @batch_obj: the batch buffer in question + * @shadow_batch_obj: copy of the batch buffer in question * @batch_start_offset: byte offset in the batch at which execution starts * @is_master: is the submitting process the drm master? * @@ -965,32 +1016,28 @@ static bool check_cmd(const struct intel_engine_cs *ring, */ int i915_parse_cmds(struct intel_engine_cs *ring, struct drm_i915_gem_object *batch_obj, + struct drm_i915_gem_object *shadow_batch_obj, u32 batch_start_offset, bool is_master) { int ret = 0; u32 *cmd, *batch_base, *batch_end; struct drm_i915_cmd_descriptor default_desc = { 0 }; - int needs_clflush = 0; bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */ - ret = i915_gem_obj_prepare_shmem_read(batch_obj, needs_clflush); - if (ret) { -
[Intel-gfx] [PATCH v4 1/7] drm/i915: Implement a framework for batch buffer pools
From: Brad Volkin bradley.d.vol...@intel.com This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- Documentation/DocBook/drm.tmpl | 5 + drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.h| 15 +++ drivers/gpu/drm/i915/i915_gem.c| 1 + drivers/gpu/drm/i915/i915_gem_batch_pool.c | 152 + 5 files changed, 174 insertions(+) create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 7277a7f..29bc8f5 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -3989,6 +3989,11 @@ int num_ioctls;/synopsis !Idrivers/gpu/drm/i915/i915_cmd_parser.c /sect2 sect2 +titleBatchbuffer Pools/title +!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool +!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c + /sect2 + sect2 titleLogical Rings, Logical Ring Contexts and Execlists/title !Pdrivers/gpu/drm/i915/intel_lrc.c Logical Rings, Logical Ring Contexts and Execlists !Idrivers/gpu/drm/i915/intel_lrc.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 891e584..73cd2d7 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -19,6 +19,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o # GEM code i915-y += i915_cmd_parser.o \ + i915_gem_batch_pool.o \ i915_gem_context.o \ i915_gem_render_state.o \ i915_gem_debug.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8fb8eba..2955ed9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1134,6 +1134,12 @@ struct intel_l3_parity { int which_slice; }; +struct i915_gem_batch_pool { + struct drm_device *dev; + struct list_head active_list; + struct list_head inactive_list; +}; + struct i915_gem_mm { /** Memory allocator for GTT stolen memory */ struct drm_mm stolen; @@ -1865,6 +1871,8 @@ struct drm_i915_gem_object { /** Used in execbuf to temporarily hold a ref */ struct list_head obj_exec_link; + struct list_head batch_pool_list; + /** * This is set if the object is on the active lists (has pending * rendering and so a non-zero seqno), and is not set if it i s on @@ -2829,6 +2837,13 @@ void i915_destroy_error_state(struct drm_device *dev); void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone); const char *i915_cache_level_str(struct drm_i915_private *i915, int type); +/* i915_gem_batch_pool.c */ +void i915_gem_batch_pool_init(struct drm_device *dev, + struct i915_gem_batch_pool *pool); +void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool); +struct drm_i915_gem_object* +i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size); + /* i915_cmd_parser.c */ int i915_cmd_parser_get_version(void); int i915_cmd_parser_init_ring(struct intel_engine_cs *ring); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3e0cabe..875c151 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4352,6 +4352,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(obj-ring_list); INIT_LIST_HEAD(obj-obj_exec_link); INIT_LIST_HEAD(obj-vma_list); + INIT_LIST_HEAD(obj-batch_pool_list); obj-ops = ops; diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c new file mode 100644 index 000..a55e43b --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -0,0 +1,152 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the
[Intel-gfx] [PATCH v4 5/7] drm/i915: Use batch length instead of object size in command parser
From: Brad Volkin bradley.d.vol...@intel.com Previously we couldn't trust the user-supplied batch length because it came directly from userspace (i.e. untrusted code). It would have affected what commands software parsed without regard to what hardware would actually execute, leaving a potential hole. With the parser now copying the user supplied batch buffer and writing MI_NOP commands to any space after the copied region, we can safely use the batch length input. This should be a performance win as the actual batch length is frequently much smaller than the allocated object size. v2: Fix handling of non-zero batch_start_offset Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 48 -- drivers/gpu/drm/i915/i915_drv.h| 1 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 1 + 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 5a3f4e4..30b3163 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -840,11 +840,19 @@ finish: /* Returns a vmap'd pointer to dest_obj, which the caller must unmap */ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, - struct drm_i915_gem_object *src_obj) + struct drm_i915_gem_object *src_obj, + u32 batch_start_offset, + u32 batch_len) { int ret = 0; int needs_clflush = 0; - u32 *src_addr, *dest_addr = NULL; + u32 *src_base, *dest_base = NULL; + u32 *src_addr, *dest_addr; + u32 offset = batch_start_offset / sizeof(*dest_addr); + u32 end = batch_start_offset + batch_len; + + if (end dest_obj-base.size || end src_obj-base.size) + return ERR_PTR(-E2BIG); ret = i915_gem_obj_prepare_shmem_read(src_obj, needs_clflush); if (ret) { @@ -852,15 +860,17 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, return ERR_PTR(ret); } - src_addr = vmap_batch(src_obj); - if (!src_addr) { + src_base = vmap_batch(src_obj); + if (!src_base) { DRM_DEBUG_DRIVER(CMD: Failed to vmap batch\n); ret = -ENOMEM; goto unpin_src; } + src_addr = src_base + offset; + if (needs_clflush) - drm_clflush_virt_range((char *)src_addr, src_obj-base.size); + drm_clflush_virt_range((char *)src_addr, batch_len); ret = i915_gem_object_set_to_cpu_domain(dest_obj, true); if (ret) { @@ -868,24 +878,27 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, goto unmap_src; } - dest_addr = vmap_batch(dest_obj); - if (!dest_addr) { + dest_base = vmap_batch(dest_obj); + if (!dest_base) { DRM_DEBUG_DRIVER(CMD: Failed to vmap shadow batch\n); ret = -ENOMEM; goto unmap_src; } - memcpy(dest_addr, src_addr, src_obj-base.size); - if (dest_obj-base.size src_obj-base.size) - memset((u8 *)dest_addr + src_obj-base.size, 0, - dest_obj-base.size - src_obj-base.size); + dest_addr = dest_base + offset; + + if (batch_start_offset != 0) + memset((u8 *)dest_base, 0, batch_start_offset); + + memcpy(dest_addr, src_addr, batch_len); + memset((u8 *)dest_addr + batch_len, 0, dest_obj-base.size - end); unmap_src: - vunmap(src_addr); + vunmap(src_base); unpin_src: i915_gem_object_unpin_pages(src_obj); - return ret ? ERR_PTR(ret) : dest_addr; + return ret ? ERR_PTR(ret) : dest_base; } /** @@ -1006,6 +1019,7 @@ static bool check_cmd(const struct intel_engine_cs *ring, * @batch_obj: the batch buffer in question * @shadow_batch_obj: copy of the batch buffer in question * @batch_start_offset: byte offset in the batch at which execution starts + * @batch_len: length of the commands in batch_obj * @is_master: is the submitting process the drm master? * * Parses the specified batch buffer looking for privilege violations as @@ -1018,6 +1032,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring, struct drm_i915_gem_object *batch_obj, struct drm_i915_gem_object *shadow_batch_obj, u32 batch_start_offset, + u32 batch_len, bool is_master) { int ret = 0; @@ -1025,7 +1040,8 @@ int i915_parse_cmds(struct intel_engine_cs *ring, struct drm_i915_cmd_descriptor default_desc = { 0 }; bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */ - batch_base = copy_batch(shadow_batch_obj, batch_obj); + batch_base = copy_batch(shadow_batch_obj, batch_obj, +
[Intel-gfx] [PATCH v4 7/7] drm/i915: Tidy up execbuffer command parsing code
From: Brad Volkin bradley.d.vol...@intel.com Move it to a separate function since the main do_execbuffer function already has so much going on. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 136 + 1 file changed, 79 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a271bc0..58f0a6c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1026,6 +1026,75 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return 0; } +static struct drm_i915_gem_object* +i915_gem_execbuffer_parse(struct intel_engine_cs *ring, + struct drm_i915_gem_exec_object2 *shadow_exec_entry, + struct eb_vmas *eb, + struct drm_i915_gem_object *batch_obj, + u32 batch_start_offset, + u32 batch_len, + bool is_master, + u32 *flags) +{ + struct drm_i915_private *dev_priv = to_i915(batch_obj-base.dev); + struct drm_i915_gem_object *shadow_batch_obj; + int ret; + + shadow_batch_obj = i915_gem_batch_pool_get(dev_priv-mm.batch_pool, + batch_obj-base.size); + if (IS_ERR(shadow_batch_obj)) + return shadow_batch_obj; + + shadow_batch_obj-madv = I915_MADV_WILLNEED; + + ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0); + if (ret) + goto err; + + ret = i915_parse_cmds(ring, + batch_obj, + shadow_batch_obj, + batch_start_offset, + batch_len, + is_master); + i915_gem_object_ggtt_unpin(shadow_batch_obj); + + if (ret) { + if (ret == -EACCES) + return batch_obj; + } else { + struct i915_vma *vma; + + memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry)); + + vma = i915_gem_obj_to_ggtt(shadow_batch_obj); + vma-exec_entry = shadow_exec_entry; + vma-exec_entry-flags = __EXEC_OBJECT_PURGEABLE; + drm_gem_object_reference(shadow_batch_obj-base); + list_add_tail(vma-exec_list, eb-vmas); + + shadow_batch_obj-base.pending_read_domains = + batch_obj-base.pending_read_domains; + + /* +* Set the DISPATCH_SECURE bit to remove the NON_SECURE +* bit from MI_BATCH_BUFFER_START commands issued in the +* dispatch_execbuffer implementations. We specifically +* don't want that set when the command parser is +* enabled. +* +* FIXME: with aliasing ppgtt, buffers that should only +* be in ggtt still end up in the aliasing ppgtt. remove +* this check when that is fixed. +*/ + if (USES_FULL_PPGTT(dev)) + *flags |= I915_DISPATCH_SECURE; + } + +err: + return ret ? ERR_PTR(ret) : shadow_batch_obj; +} + int i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, struct intel_engine_cs *ring, @@ -1242,7 +1311,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_i915_private *dev_priv = dev-dev_private; struct eb_vmas *eb; struct drm_i915_gem_object *batch_obj; - struct drm_i915_gem_object *shadow_batch_obj = NULL; struct drm_i915_gem_exec_object2 shadow_exec_entry; struct intel_engine_cs *ring; struct intel_context *ctx; @@ -1369,63 +1437,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, batch_obj-base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; if (i915_needs_cmd_parser(ring)) { - shadow_batch_obj = - i915_gem_batch_pool_get(dev_priv-mm.batch_pool, - batch_obj-base.size); - if (IS_ERR(shadow_batch_obj)) { - ret = PTR_ERR(shadow_batch_obj); - /* Don't try to clean up the obj in the error path */ - shadow_batch_obj = NULL; - goto err; - } - - shadow_batch_obj-madv = I915_MADV_WILLNEED; - - ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0); - if (ret) + batch_obj = i915_gem_execbuffer_parse(ring, + shadow_exec_entry, + eb, +
[Intel-gfx] [PATCH] tests/drv_hangman: skip a few asserts when using the cmd parser
From: Brad Volkin bradley.d.vol...@intel.com This test has a few checks that batch buffer addresses in the error state match the expected address for the userspace supplied batch. But the batch buffer copy piece of the command parser means that the logged addresses are actually _supposed_ to be different. So skip just those checks. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- tests/drv_hangman.c | 43 +-- 1 file changed, 37 insertions(+), 6 deletions(-) I'm not sure it's actually worth doing any of the work beyond the 'if (bb_matched == 1)' block in check_error_state when the command parser is enabled. Here I've kept that work and just taken out the one check that would fail. But it seems like the work that's left is only there to enable the removed check, so maybe not needed. I'll defer to those that know more about this test. diff --git a/tests/drv_hangman.c b/tests/drv_hangman.c index 3d6b98b..8fbc2d3 100644 --- a/tests/drv_hangman.c +++ b/tests/drv_hangman.c @@ -36,6 +36,10 @@ #include igt_debugfs.h #include ioctl_wrappers.h +#ifndef I915_PARAM_CMD_PARSER_VERSION +#define I915_PARAM_CMD_PARSER_VERSION 28 +#endif + static int _read_sysfs(void *dst, int maxlen, const char* path, const char *fname) @@ -262,6 +266,7 @@ static void test_error_state_basic(void) } static void check_error_state(const int gen, + const bool uses_cmd_parser, const char *expected_ring_name, uint64_t expected_offset) { @@ -300,7 +305,8 @@ static void check_error_state(const int gen, char expected_line[32]; igt_assert(strstr(ring_name, expected_ring_name)); - igt_assert(gtt_offset == expected_offset); + if (!uses_cmd_parser) + igt_assert(gtt_offset == expected_offset); for (i = 0; i sizeof(batch) / 4; i++) { igt_assert(getline(line, line_size, file) 0); @@ -352,10 +358,12 @@ static void check_error_state(const int gen, i++; } } - if (gen = 4) - igt_assert(expected_addr == expected_offset); - else - igt_assert((expected_addr ~0x1) == expected_offset); + if (!uses_cmd_parser) { + if (gen = 4) + igt_assert(expected_addr == expected_offset); + else + igt_assert((expected_addr ~0x1) == expected_offset); + } ringbuf_ok = true; continue; } @@ -370,22 +378,45 @@ static void check_error_state(const int gen, close(debug_fd); } +static bool uses_cmd_parser(int fd, int gen) +{ + int parser_version = 0; + drm_i915_getparam_t gp; + int rc; + + gp.param = I915_PARAM_CMD_PARSER_VERSION; + gp.value = parser_version; + rc = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, gp); + if (rc || parser_version == 0) + return false; + + if (!gem_uses_aliasing_ppgtt(fd)) + return false; + + if (gen != 7) + return false; + + return true; +} + static void test_error_state_capture(unsigned ring_id, const char *ring_name) { int fd, gen; uint64_t offset; + bool cmd_parser; check_other_clients(); clear_error_state(); fd = drm_open_any(); gen = intel_gen(intel_get_drm_devid(fd)); + cmd_parser = uses_cmd_parser(fd, gen); offset = submit_batch(fd, ring_id, true); close(fd); - check_error_state(gen, ring_name, offset); + check_error_state(gen, cmd_parser, ring_name, offset); } static const struct target_ring { -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 5/5] drm/i915: Use batch length instead of object size in command parser
From: Brad Volkin bradley.d.vol...@intel.com Previously we couldn't trust the user-supplied batch length because it came directly from userspace (i.e. untrusted code). It would have affected what commands software parsed without regard to what hardware would actually execute, leaving a potential hole. With the parser now copying the user supplied batch buffer and writing MI_NOP commands to any space after the copied region, we can safely use the batch length input. This should be a performance win as the actual batch length is frequently much smaller than the allocated object size. v2: Fix handling of non-zero batch_start_offset Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 48 -- drivers/gpu/drm/i915/i915_drv.h| 1 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 1 + 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index c8fe403..d4d13b1 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -840,11 +840,19 @@ finish: /* Returns a vmap'd pointer to dest_obj, which the caller must unmap */ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, - struct drm_i915_gem_object *src_obj) + struct drm_i915_gem_object *src_obj, + u32 batch_start_offset, + u32 batch_len) { int ret = 0; int needs_clflush = 0; - u32 *src_addr, *dest_addr = NULL; + u32 *src_base, *dest_base = NULL; + u32 *src_addr, *dest_addr; + u32 offset = batch_start_offset / sizeof(*dest_addr); + u32 end = batch_start_offset + batch_len; + + if (end dest_obj-base.size || end src_obj-base.size) + return ERR_PTR(-E2BIG); ret = i915_gem_obj_prepare_shmem_read(src_obj, needs_clflush); if (ret) { @@ -852,15 +860,17 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, return ERR_PTR(ret); } - src_addr = vmap_batch(src_obj); - if (!src_addr) { + src_base = vmap_batch(src_obj); + if (!src_base) { DRM_DEBUG_DRIVER(CMD: Failed to vmap batch\n); ret = -ENOMEM; goto unpin_src; } + src_addr = src_base + offset; + if (needs_clflush) - drm_clflush_virt_range((char *)src_addr, src_obj-base.size); + drm_clflush_virt_range((char *)src_addr, batch_len); ret = i915_gem_object_set_to_cpu_domain(dest_obj, true); if (ret) { @@ -868,24 +878,27 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, goto unmap_src; } - dest_addr = vmap_batch(dest_obj); - if (!dest_addr) { + dest_base = vmap_batch(dest_obj); + if (!dest_base) { DRM_DEBUG_DRIVER(CMD: Failed to vmap shadow batch\n); ret = -ENOMEM; goto unmap_src; } - memcpy(dest_addr, src_addr, src_obj-base.size); - if (dest_obj-base.size src_obj-base.size) - memset((u8 *)dest_addr + src_obj-base.size, 0, - dest_obj-base.size - src_obj-base.size); + dest_addr = dest_base + offset; + + if (batch_start_offset != 0) + memset((u8 *)dest_base, 0, batch_start_offset); + + memcpy(dest_addr, src_addr, batch_len); + memset((u8 *)dest_addr + batch_len, 0, dest_obj-base.size - end); unmap_src: - vunmap(src_addr); + vunmap(src_base); unpin_src: i915_gem_object_unpin_pages(src_obj); - return ret ? ERR_PTR(ret) : dest_addr; + return ret ? ERR_PTR(ret) : dest_base; } /** @@ -1006,6 +1019,7 @@ static bool check_cmd(const struct intel_engine_cs *ring, * @batch_obj: the batch buffer in question * @shadow_batch_obj: copy of the batch buffer in question * @batch_start_offset: byte offset in the batch at which execution starts + * @batch_len: length of the commands in batch_obj * @is_master: is the submitting process the drm master? * * Parses the specified batch buffer looking for privilege violations as @@ -1018,6 +1032,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring, struct drm_i915_gem_object *batch_obj, struct drm_i915_gem_object *shadow_batch_obj, u32 batch_start_offset, + u32 batch_len, bool is_master) { int ret = 0; @@ -1025,7 +1040,8 @@ int i915_parse_cmds(struct intel_engine_cs *ring, struct drm_i915_cmd_descriptor default_desc = { 0 }; bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */ - batch_base = copy_batch(shadow_batch_obj, batch_obj); + batch_base = copy_batch(shadow_batch_obj, batch_obj, +
[Intel-gfx] [PATCH v3 3/5] drm/i915: Add a batch pool debugfs file
From: Brad Volkin bradley.d.vol...@intel.com It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 41 + 1 file changed, 41 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index a79f83c..5f7cbed 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -582,6 +582,46 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) return 0; } +static int i915_gem_batch_pool_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m-private; + struct drm_device *dev = node-minor-dev; + struct drm_i915_private *dev_priv = dev-dev_private; + struct drm_i915_gem_object *obj; + int count = 0; + int ret; + + ret = mutex_lock_interruptible(dev-struct_mutex); + if (ret) + return ret; + + seq_puts(m, active:\n); + list_for_each_entry(obj, + dev_priv-mm.batch_pool.active_list, + batch_pool_list) { + seq_puts(m,); + describe_obj(m, obj); + seq_putc(m, '\n'); + count++; + } + + seq_puts(m, inactive:\n); + list_for_each_entry(obj, + dev_priv-mm.batch_pool.inactive_list, + batch_pool_list) { + seq_puts(m,); + describe_obj(m, obj); + seq_putc(m, '\n'); + count++; + } + + seq_printf(m, total: %d\n, count); + + mutex_unlock(dev-struct_mutex); + + return 0; +} + static int i915_gem_request_info(struct seq_file *m, void *data) { struct drm_info_node *node = m-private; @@ -4177,6 +4217,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {i915_gem_hws_blt, i915_hws_info, 0, (void *)BCS}, {i915_gem_hws_bsd, i915_hws_info, 0, (void *)VCS}, {i915_gem_hws_vebox, i915_hws_info, 0, (void *)VECS}, + {i915_gem_batch_pool, i915_gem_batch_pool_info, 0}, {i915_frequency_info, i915_frequency_info, 0}, {i915_drpc_info, i915_drpc_info, 0}, {i915_emon_status, i915_emon_status, 0}, -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 4/5] drm/i915: Add batch pool details to i915_gem_objects debugfs
From: Brad Volkin bradley.d.vol...@intel.com To better account for the potentially large memory consumption of the batch pool. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 45 + 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5f7cbed..53f78da 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -360,6 +360,38 @@ static int per_file_stats(int id, void *ptr, void *data) return 0; } +#define print_file_stats(m, name, stats) \ + seq_printf(m, %s: %u objects, %zu bytes (%zu active, %zu inactive, %zu global, %zu shared, %zu unbound)\n, \ + name, \ + stats.count, \ + stats.total, \ + stats.active, \ + stats.inactive, \ + stats.global, \ + stats.shared, \ + stats.unbound) + +static void print_batch_pool_stats(struct seq_file *m, + struct drm_i915_private *dev_priv) +{ + struct drm_i915_gem_object *obj; + struct file_stats stats; + + memset(stats, 0, sizeof(stats)); + + list_for_each_entry(obj, + dev_priv-mm.batch_pool.active_list, + batch_pool_list) + per_file_stats(0, obj, stats); + + list_for_each_entry(obj, + dev_priv-mm.batch_pool.inactive_list, + batch_pool_list) + per_file_stats(0, obj, stats); + + print_file_stats(m, batch pool, stats); +} + #define count_vmas(list, member) do { \ list_for_each_entry(vma, list, member) { \ size += i915_gem_obj_ggtt_size(vma-obj); \ @@ -442,6 +474,9 @@ static int i915_gem_object_info(struct seq_file *m, void* data) dev_priv-gtt.mappable_end - dev_priv-gtt.base.start); seq_putc(m, '\n'); + print_batch_pool_stats(m, dev_priv); + + seq_putc(m, '\n'); list_for_each_entry_reverse(file, dev-filelist, lhead) { struct file_stats stats; struct task_struct *task; @@ -459,15 +494,7 @@ static int i915_gem_object_info(struct seq_file *m, void* data) */ rcu_read_lock(); task = pid_task(file-pid, PIDTYPE_PID); - seq_printf(m, %s: %u objects, %zu bytes (%zu active, %zu inactive, %zu global, %zu shared, %zu unbound)\n, - task ? task-comm : unknown, - stats.count, - stats.total, - stats.active, - stats.inactive, - stats.global, - stats.shared, - stats.unbound); + print_file_stats(m, task ? task-comm : unknown, stats); rcu_read_unlock(); } -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 0/5] Command parser batch buffer copy
From: Brad Volkin bradley.d.vol...@intel.com This is v3 of the series I sent here: http://lists.freedesktop.org/archives/intel-gfx/2014-July/048705.html Most of the previous commentary still applies. We've fixed the secure dispatch regression though, so the series now puts the parser into enabling mode in patch 2. There are currently some regressions. I've sent i-g-t patches for a couple that are test issues. The remaining issues are: drv_hangman error-state-capture-* The test has checks that the logged 'gtt_offset' matches the expected offset of the userspace-supplied batch buffer. Similarly for the address in an MI_BATCH_BUFFER_START command found in the logged ringbuffer contents. These obviously won't match if the buffer submitted to hardware is from the batch pool instead of the one from userspace. gem_reloc_vs_gpu *-thrash-inactive gem_persistent_relocs *-thrash-inactive These fail with this type of error: Test assertion failure function do_test, file gem_reloc_vs_gpu.c:221: Failed assertion: test == 0xdeadbeef mismatch in buffer 0: 0x instead of 0xdeadbeef child 6 failed with exit status 99 Subtest forked-thrash-inactive: FAIL (3.824s) One crashed, apparently in i915_gem_object_move_to_inactive() called via i915_gem_reset(). I assume there's an issue with my active tracking or madv usage for batch pool objects. Any input would be helpful. gem_cs_tlb This test takes longer and may time out. Brad Volkin (5): drm/i915: Implement a framework for batch buffer pools drm/i915: Use batch pools with the command parser drm/i915: Add a batch pool debugfs file drm/i915: Add batch pool details to i915_gem_objects debugfs drm/i915: Use batch length instead of object size in command parser Documentation/DocBook/drm.tmpl | 5 + drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_cmd_parser.c | 102 +++ drivers/gpu/drm/i915/i915_debugfs.c| 86 ++-- drivers/gpu/drm/i915/i915_dma.c| 1 + drivers/gpu/drm/i915/i915_drv.h| 26 + drivers/gpu/drm/i915/i915_gem.c| 11 +++ drivers/gpu/drm/i915/i915_gem_batch_pool.c | 153 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 62 +++- 9 files changed, 416 insertions(+), 31 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 2/5] drm/i915: Use batch pools with the command parser
From: Brad Volkin bradley.d.vol...@intel.com This patch sets up all of the tracking and copying necessary to use batch pools with the command parser and dispatches the copied (shadow) batch to the hardware. After this patch, the parser is in 'enabling' mode. Note that performance takes a hit from the copy in some cases and will likely need some work. At a rough pass, the memcpy appears to be the bottleneck. Without having done a deeper analysis, two ideas that come to mind are: 1) Copy sections of the batch at a time, as they are reached by parsing. Might improve cache locality. 2) Copy only up to the userspace-supplied batch length and memset the rest of the buffer. Reduces the number of reads. v2: - Remove setting the capacity of the pool - One global pool instead of per-ring pools - Replace batch_obj with shadow_batch_obj and hook into eb-vmas - Memset any space in the shadow batch beyond what gets copied - Rebased on execlist prep refactoring v3: - Rebase on chained batch handling - Squash in setting the secure dispatch flag - Add a note about the interaction w/secure dispatch pinning - Check for request-batch_obj == NULL in i915_gem_free_request Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 84 -- drivers/gpu/drm/i915/i915_dma.c| 1 + drivers/gpu/drm/i915/i915_drv.h| 8 +++ drivers/gpu/drm/i915/i915_gem.c| 10 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 61 -- 5 files changed, 143 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 809bb95..c8fe403 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -838,6 +838,56 @@ finish: return (u32*)addr; } +/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */ +static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, + struct drm_i915_gem_object *src_obj) +{ + int ret = 0; + int needs_clflush = 0; + u32 *src_addr, *dest_addr = NULL; + + ret = i915_gem_obj_prepare_shmem_read(src_obj, needs_clflush); + if (ret) { + DRM_DEBUG_DRIVER(CMD: failed to prep read\n); + return ERR_PTR(ret); + } + + src_addr = vmap_batch(src_obj); + if (!src_addr) { + DRM_DEBUG_DRIVER(CMD: Failed to vmap batch\n); + ret = -ENOMEM; + goto unpin_src; + } + + if (needs_clflush) + drm_clflush_virt_range((char *)src_addr, src_obj-base.size); + + ret = i915_gem_object_set_to_cpu_domain(dest_obj, true); + if (ret) { + DRM_DEBUG_DRIVER(CMD: Failed to set batch CPU domain\n); + goto unmap_src; + } + + dest_addr = vmap_batch(dest_obj); + if (!dest_addr) { + DRM_DEBUG_DRIVER(CMD: Failed to vmap shadow batch\n); + ret = -ENOMEM; + goto unmap_src; + } + + memcpy(dest_addr, src_addr, src_obj-base.size); + if (dest_obj-base.size src_obj-base.size) + memset((u8 *)dest_addr + src_obj-base.size, 0, + dest_obj-base.size - src_obj-base.size); + +unmap_src: + vunmap(src_addr); +unpin_src: + i915_gem_object_unpin_pages(src_obj); + + return ret ? ERR_PTR(ret) : dest_addr; +} + /** * i915_needs_cmd_parser() - should a given ring use software command parsing? * @ring: the ring in question @@ -954,6 +1004,7 @@ static bool check_cmd(const struct intel_engine_cs *ring, * i915_parse_cmds() - parse a submitted batch buffer for privilege violations * @ring: the ring on which the batch is to execute * @batch_obj: the batch buffer in question + * @shadow_batch_obj: copy of the batch buffer in question * @batch_start_offset: byte offset in the batch at which execution starts * @is_master: is the submitting process the drm master? * @@ -965,32 +1016,28 @@ static bool check_cmd(const struct intel_engine_cs *ring, */ int i915_parse_cmds(struct intel_engine_cs *ring, struct drm_i915_gem_object *batch_obj, + struct drm_i915_gem_object *shadow_batch_obj, u32 batch_start_offset, bool is_master) { int ret = 0; u32 *cmd, *batch_base, *batch_end; struct drm_i915_cmd_descriptor default_desc = { 0 }; - int needs_clflush = 0; bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */ - ret = i915_gem_obj_prepare_shmem_read(batch_obj, needs_clflush); - if (ret) { - DRM_DEBUG_DRIVER(CMD: failed to prep read\n); - return ret; + batch_base = copy_batch(shadow_batch_obj, batch_obj); + if (IS_ERR(batch_base)) { + DRM_DEBUG_DRIVER(CMD: Failed to copy batch\n); + return
[Intel-gfx] [PATCH v3 1/5] drm/i915: Implement a framework for batch buffer pools
From: Brad Volkin bradley.d.vol...@intel.com This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up; get to obtain a new buffer, put to return it to the pool. Note that all buffers must be returned to the pool before cleaning up the pool. Buffers are purgeable while in the pool, but not explicitly truncated in order to avoid overhead during execbuf. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- Documentation/DocBook/drm.tmpl | 5 + drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.h| 17 drivers/gpu/drm/i915/i915_gem.c| 1 + drivers/gpu/drm/i915/i915_gem_batch_pool.c | 153 + 5 files changed, 177 insertions(+) create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index f6a9d7b..133f4e6 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -3958,6 +3958,11 @@ int num_ioctls;/synopsis !Idrivers/gpu/drm/i915/i915_cmd_parser.c /sect2 sect2 +titleBatchbuffer Pools/title +!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool +!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c + /sect2 + sect2 titleLogical Rings, Logical Ring Contexts and Execlists/title !Pdrivers/gpu/drm/i915/intel_lrc.c Logical Rings, Logical Ring Contexts and Execlists !Idrivers/gpu/drm/i915/intel_lrc.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 891e584..73cd2d7 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -19,6 +19,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o # GEM code i915-y += i915_cmd_parser.o \ + i915_gem_batch_pool.o \ i915_gem_context.o \ i915_gem_render_state.o \ i915_gem_debug.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6a73803..fbf10cc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1126,6 +1126,12 @@ struct intel_l3_parity { int which_slice; }; +struct i915_gem_batch_pool { + struct drm_device *dev; + struct list_head active_list; + struct list_head inactive_list; +}; + struct i915_gem_mm { /** Memory allocator for GTT stolen memory */ struct drm_mm stolen; @@ -1797,6 +1803,8 @@ struct drm_i915_gem_object { /** Used in execbuf to temporarily hold a ref */ struct list_head obj_exec_link; + struct list_head batch_pool_list; + /** * This is set if the object is on the active lists (has pending * rendering and so a non-zero seqno), and is not set if it i s on @@ -2758,6 +2766,15 @@ void i915_destroy_error_state(struct drm_device *dev); void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone); const char *i915_cache_level_str(struct drm_i915_private *i915, int type); +/* i915_gem_batch_pool.c */ +void i915_gem_batch_pool_init(struct drm_device *dev, + struct i915_gem_batch_pool *pool); +void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool); +struct drm_i915_gem_object* +i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size); +void i915_gem_batch_pool_put(struct i915_gem_batch_pool *pool, +struct drm_i915_gem_object *obj); + /* i915_cmd_parser.c */ int i915_cmd_parser_get_version(void); int i915_cmd_parser_init_ring(struct intel_engine_cs *ring); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7e91978..4dbd7b9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4337,6 +4337,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(obj-ring_list); INIT_LIST_HEAD(obj-obj_exec_link); INIT_LIST_HEAD(obj-vma_list); + INIT_LIST_HEAD(obj-batch_pool_list); obj-ops = ops; diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c new file mode 100644 index 000..6d526fa --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@
[Intel-gfx] [PATCH 2/2] tests/gem_madvise: set execbuf.batch_len before doing an execbuf
From: Brad Volkin bradley.d.vol...@intel.com The command parser's batch_len optimization causes the parser to reject this batch as not having an MI_BATCH_BUFFER_END because the length was not set correctly. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- tests/gem_madvise.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/gem_madvise.c b/tests/gem_madvise.c index 04a82aa..f95fbda 100644 --- a/tests/gem_madvise.c +++ b/tests/gem_madvise.c @@ -131,6 +131,7 @@ dontneed_before_exec(void) execbuf.buffers_ptr = (uintptr_t)exec; execbuf.buffer_count = 1; + execbuf.batch_len = sizeof(buf); gem_execbuf(fd, execbuf); gem_close(fd, exec.handle); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] tests/gem_exec_parse: fix batch_len setting for cmd-crossing-page
From: Brad Volkin bradley.d.vol...@intel.com The size of the batch buffer passed to the kernel is significantly larger than the size of the batch buffer passed to the function. A proposed optimization as part of the batch copy kernel series is to use batch_len for the copy and parse operations, which leads to a false batch without MI_BATCH_BUFFER_END failure for this test. To fix this, modify the test to set batch_start_offset and batch_len such that they define the range of actual commands in the batch, including a few of the surrounding nops for alignment purposes. v2: update batch_start_offset as well Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- tests/gem_exec_parse.c | 16 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c index 1dc9103..e48b83a 100644 --- a/tests/gem_exec_parse.c +++ b/tests/gem_exec_parse.c @@ -144,16 +144,18 @@ static void exec_split_batch(int fd, uint32_t *cmds, struct drm_i915_gem_exec_object2 objs[1]; uint32_t cmd_bo; uint32_t noop[1024] = { 0 }; + const int alloc_size = 4096 * 2; + const int actual_start_offset = 4096-sizeof(uint32_t); // Allocate and fill a 2-page batch with noops - cmd_bo = gem_create(fd, 4096 * 2); + cmd_bo = gem_create(fd, alloc_size); gem_write(fd, cmd_bo, 0, noop, sizeof(noop)); gem_write(fd, cmd_bo, 4096, noop, sizeof(noop)); // Write the provided commands such that the first dword // of the command buffer is the last dword of the first // page (i.e. the command is split across the two pages). - gem_write(fd, cmd_bo, 4096-sizeof(uint32_t), cmds, size); + gem_write(fd, cmd_bo, actual_start_offset, cmds, size); objs[0].handle = cmd_bo; objs[0].relocation_count = 0; @@ -166,8 +168,14 @@ static void exec_split_batch(int fd, uint32_t *cmds, execbuf.buffers_ptr = (uintptr_t)objs; execbuf.buffer_count = 1; - execbuf.batch_start_offset = 0; - execbuf.batch_len = size; + // NB: We want batch_start_offset and batch_len to point to the block + // of the actual commands (i.e. at the last dword of the first page), + // but have to adjust both the start offset and length to meet the + // kernel driver's requirements on the alignment of those fields. + execbuf.batch_start_offset = actual_start_offset ~0x7; + execbuf.batch_len = + ALIGN(size + actual_start_offset - execbuf.batch_start_offset, + 0x8); execbuf.cliprects_ptr = 0; execbuf.num_cliprects = 0; execbuf.DR1 = 0; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: Abort command parsing for chained batches
From: Brad Volkin bradley.d.vol...@intel.com libva uses chained batch buffers in a way that the command parser can't generally handle. Fortunately, libva doesn't need to write registers from batch buffers in the way that mesa does, so this patch causes the driver to fall back to non-secure dispatch if the parser detects a chained batch buffer. Testcase: igt/gem_exec_parse/chained-batch Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 18 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 24 +--- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 86b3ae0..ef38915 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -138,6 +138,11 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { .mask = MI_GLOBAL_GTT, .expected = 0, }}, ), + /* +* MI_BATCH_BUFFER_START requires some special handling. It's not +* really a 'skip' action but it doesn't seem like it's worth adding +* a new action. See i915_parse_cmds(). +*/ CMD( MI_BATCH_BUFFER_START,SMI, !F, 0xFF, S ), }; @@ -955,7 +960,8 @@ static bool check_cmd(const struct intel_engine_cs *ring, * Parses the specified batch buffer looking for privilege violations as * described in the overview. * - * Return: non-zero if the parser finds violations or otherwise fails + * Return: non-zero if the parser finds violations or otherwise fails; -EACCES + * if the batch appears legal but should use hardware parsing */ int i915_parse_cmds(struct intel_engine_cs *ring, struct drm_i915_gem_object *batch_obj, @@ -1002,6 +1008,16 @@ int i915_parse_cmds(struct intel_engine_cs *ring, break; } + /* +* If the batch buffer contains a chained batch, return an +* error that tells the caller to abort and dispatch the +* workload as a non-secure batch. +*/ + if (desc-cmd.value == MI_BATCH_BUFFER_START) { + ret = -EACCES; + break; + } + if (desc-flags CMD_DESC_FIXED) length = desc-length.fixed; else diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1a0611b..1ed5702 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1368,17 +1368,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, batch_obj, args-batch_start_offset, file-is_master); - if (ret) - goto err; - - /* -* XXX: Actually do this when enabling batch copy... -* -* Set the DISPATCH_SECURE bit to remove the NON_SECURE bit -* from MI_BATCH_BUFFER_START commands issued in the -* dispatch_execbuffer implementations. We specifically don't -* want that set when the command parser is enabled. -*/ + if (ret) { + if (ret != -EACCES) + goto err; + } else { + /* +* XXX: Actually do this when enabling batch copy... +* +* Set the DISPATCH_SECURE bit to remove the NON_SECURE bit +* from MI_BATCH_BUFFER_START commands issued in the +* dispatch_execbuffer implementations. We specifically don't +* want that set when the command parser is enabled. +*/ + } } /* snb/ivb/vlv conflate the batch in ppgtt bit with the non-secure -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] tests/gem_exec_parse: test for chained batch buffers
From: Brad Volkin bradley.d.vol...@intel.com libva makes extensive use of chained batch buffers. The batch buffer copy portion of the command parser has the potential to break chained batches, so add a simple test to make sure that doesn't happen. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- lib/intel_reg.h| 1 + tests/gem_exec_parse.c | 105 + 2 files changed, 106 insertions(+) diff --git a/lib/intel_reg.h b/lib/intel_reg.h index f0fc5fd..fcc9d7c 100644 --- a/lib/intel_reg.h +++ b/lib/intel_reg.h @@ -2571,6 +2571,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define MI_BATCH_BUFFER_END(0xA 23) #define MI_BATCH_NON_SECURE(1) #define MI_BATCH_NON_SECURE_I965 (1 8) +#define MI_BATCH_NON_SECURE_HSW(113) /* Additional bit for RCS */ #define MAX_DISPLAY_PIPES 2 diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c index 568bd4a..3ff6a66 100644 --- a/tests/gem_exec_parse.c +++ b/tests/gem_exec_parse.c @@ -183,6 +183,96 @@ static void exec_split_batch(int fd, uint32_t *cmds, gem_close(fd, cmd_bo); } +static void exec_batch_chained(int fd, uint32_t cmd_bo, uint32_t *cmds, + int size, int patch_offset, + uint64_t expected_value) +{ + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 objs[3]; + struct drm_i915_gem_relocation_entry reloc; + struct drm_i915_gem_relocation_entry first_level_reloc; + + uint32_t target_bo = gem_create(fd, 4096); + uint32_t first_level_bo = gem_create(fd, 4096); + uint64_t actual_value = 0; + + static uint32_t first_level_cmds[] = { + MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965, + 0, + MI_BATCH_BUFFER_END, + 0, + }; + + if (IS_HASWELL(intel_get_drm_devid(fd))) + first_level_cmds[0] |= MI_BATCH_NON_SECURE_HSW; + + gem_write(fd, first_level_bo, 0, + first_level_cmds, sizeof(first_level_cmds)); + gem_write(fd, cmd_bo, 0, cmds, size); + + reloc.offset = patch_offset; + reloc.delta = 0; + reloc.target_handle = target_bo; + reloc.read_domains = I915_GEM_DOMAIN_RENDER; + reloc.write_domain = I915_GEM_DOMAIN_RENDER; + reloc.presumed_offset = 0; + + first_level_reloc.offset = 4; + first_level_reloc.delta = 0; + first_level_reloc.target_handle = cmd_bo; + first_level_reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION; + first_level_reloc.write_domain = 0; + first_level_reloc.presumed_offset = 0; + + objs[0].handle = target_bo; + objs[0].relocation_count = 0; + objs[0].relocs_ptr = 0; + objs[0].alignment = 0; + objs[0].offset = 0; + objs[0].flags = 0; + objs[0].rsvd1 = 0; + objs[0].rsvd2 = 0; + + objs[1].handle = cmd_bo; + objs[1].relocation_count = 1; + objs[1].relocs_ptr = (uintptr_t)reloc; + objs[1].alignment = 0; + objs[1].offset = 0; + objs[1].flags = 0; + objs[1].rsvd1 = 0; + objs[1].rsvd2 = 0; + + objs[2].handle = first_level_bo; + objs[2].relocation_count = 1; + objs[2].relocs_ptr = (uintptr_t)first_level_reloc; + objs[2].alignment = 0; + objs[2].offset = 0; + objs[2].flags = 0; + objs[2].rsvd1 = 0; + objs[2].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t)objs; + execbuf.buffer_count = 3; + execbuf.batch_start_offset = 0; + execbuf.batch_len = sizeof(first_level_cmds); + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + execbuf.flags = I915_EXEC_RENDER; + i915_execbuffer2_set_context_id(execbuf, 0); + execbuf.rsvd2 = 0; + + gem_execbuf(fd, execbuf); + gem_sync(fd, cmd_bo); + + gem_read(fd,target_bo, 0, actual_value, sizeof(actual_value)); + igt_assert_eq(expected_value, actual_value); + + gem_close(fd, first_level_bo); + gem_close(fd, target_bo); +} + uint32_t handle; int fd; @@ -366,6 +456,21 @@ igt_main -EINVAL); } + igt_subtest(chained-batch) { + uint32_t pc[] = { + GFX_OP_PIPE_CONTROL, + PIPE_CONTROL_QW_WRITE, + 0, // To be patched + 0x1200, + 0, + MI_BATCH_BUFFER_END, + }; + exec_batch_chained(fd, handle, + pc, sizeof(pc), + 8, // patch offset, + 0x1200); + } + igt_fixture { gem_close(fd, handle); -- 1.9.1 ___ Intel-gfx mailing
[Intel-gfx] [PATCH 1/2] tests/gem_exec_parse: fix batch_len setting for cmd-crossing-page
From: Brad Volkin bradley.d.vol...@intel.com The size of the batch buffer passed to the kernel is significantly larger than the size of the batch buffer passed to the function. A proposed optimization as part of the batch copy kernel series is to use batch_len for the copy and parse operations, which leads to a false batch without MI_BATCH_BUFFER_END failure for this test. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- tests/gem_exec_parse.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c index 05f271c..568bd4a 100644 --- a/tests/gem_exec_parse.c +++ b/tests/gem_exec_parse.c @@ -144,9 +144,10 @@ static void exec_split_batch(int fd, uint32_t *cmds, struct drm_i915_gem_exec_object2 objs[1]; uint32_t cmd_bo; uint32_t noop[1024] = { 0 }; + const int alloc_size = 4096 * 2; // Allocate and fill a 2-page batch with noops - cmd_bo = gem_create(fd, 4096 * 2); + cmd_bo = gem_create(fd, alloc_size); gem_write(fd, cmd_bo, 0, noop, sizeof(noop)); gem_write(fd, cmd_bo, 4096, noop, sizeof(noop)); @@ -167,7 +168,7 @@ static void exec_split_batch(int fd, uint32_t *cmds, execbuf.buffers_ptr = (uintptr_t)objs; execbuf.buffer_count = 1; execbuf.batch_start_offset = 0; - execbuf.batch_len = size; + execbuf.batch_len = alloc_size; execbuf.cliprects_ptr = 0; execbuf.num_cliprects = 0; execbuf.DR1 = 0; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: Don't leak command parser tables on suspend/resume
From: Brad Volkin bradley.d.vol...@intel.com Ring init and cleanup are not balanced because we re-init the rings on resume without having cleaned them up on suspend. This leads to the driver leaking the parser's hash tables with a kmemleak signature such as this: unreferenced object 0x880405960980 (size 32): comm systemd-udevd, pid 516, jiffies 4294896961 (age 10202.044s) hex dump (first 32 bytes): d0 85 46 c0 ff ff ff ff 00 00 00 00 00 00 00 00 ..F. 98 60 28 04 04 88 ff ff 00 00 00 00 00 00 00 00 .`(. backtrace: [81816f9e] kmemleak_alloc+0x4e/0xb0 [811fa678] kmem_cache_alloc_trace+0x168/0x2f0 [c03e20a5] i915_cmd_parser_init_ring+0x2a5/0x3e0 [i915] [c04088a2] intel_init_ring_buffer+0x202/0x470 [i915] [c040c998] intel_init_vebox_ring_buffer+0x1e8/0x2b0 [i915] [c03eff59] i915_gem_init_hw+0x2f9/0x3a0 [i915] [c03f0057] i915_gem_init+0x57/0x1d0 [i915] [c045e26a] i915_driver_load+0xc0a/0x10e0 [i915] [c02e0d5d] drm_dev_register+0xad/0x100 [drm] [c02e3b9f] drm_get_pci_dev+0x8f/0x200 [drm] [c03c934b] i915_pci_probe+0x3b/0x60 [i915] [81436725] local_pci_probe+0x45/0xa0 [81437a69] pci_device_probe+0xd9/0x130 [81524f4d] driver_probe_device+0x12d/0x3e0 [815252d3] __driver_attach+0x93/0xa0 [81522e1b] bus_for_each_dev+0x6b/0xb0 This patch extends the current convention of checking whether a resource is already allocated before allocating it during ring init. Longer term it might make sense to only init the rings once. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=83794 Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- According to the report in bugzilla, this happens in linux-next-20140919 as well. I'm not sure what the path is for getting the fix there in addition to nightly. drivers/gpu/drm/i915/i915_cmd_parser.c | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 4c35e2a..86b3ae0 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -709,11 +709,13 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *ring) BUG_ON(!validate_cmds_sorted(ring, cmd_tables, cmd_table_count)); BUG_ON(!validate_regs_sorted(ring)); - ret = init_hash_table(ring, cmd_tables, cmd_table_count); - if (ret) { - DRM_ERROR(CMD: cmd_parser_init failed!\n); - fini_hash_table(ring); - return ret; + if (hash_empty(ring-cmd_hash)) { + ret = init_hash_table(ring, cmd_tables, cmd_table_count); + if (ret) { + DRM_ERROR(CMD: cmd_parser_init failed!\n); + fini_hash_table(ring); + return ret; + } } ring-needs_cmd_parser = true; -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915: Log a message when rejecting LRM to OACONTROL
From: Brad Volkin bradley.d.vol...@intel.com The other paths in the command parser that reject a batch all log a message indicating the reason. We simply missed this one. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index fb24dae..e1e7d37 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -886,8 +886,10 @@ static bool check_cmd(const struct intel_engine_cs *ring, * OACONTROL writes to only MI_LOAD_REGISTER_IMM commands. */ if (reg_addr == OACONTROL) { - if (desc-cmd.value == MI_LOAD_REGISTER_MEM) + if (desc-cmd.value == MI_LOAD_REGISTER_MEM) { + DRM_DEBUG_DRIVER(CMD: Rejected LRM to OACONTROL\n); return false; + } if (desc-cmd.value == MI_LOAD_REGISTER_IMM(1)) *oacontrol_set = (cmd[2] != 0); -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] drm/i915: Re-enable the command parser when using PPGTT
From: Brad Volkin bradley.d.vol...@intel.com In commit commit 896ab1a5d54269b463a24194c2e4a369103b46d8 Author: Daniel Vetter daniel.vet...@ffwll.ch Date: Wed Aug 6 15:04:51 2014 +0200 drm/i915: Fix up checks for aliasing ppgtt it looks like we accidentally inverted the check that the command parser should only run when the driver enables some form of PPGTT. Testcase: igt/gem_exec_parse Cc: Daniel Vetter daniel.vet...@ffwll.ch Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- At this point all platforms that use the command parser should have at least aliasing PPGTT enabled I believe, so if you confirm then feel free to delete the comment about VLV and make this check stricter - maybe WARN_ON - when applying the patch. drivers/gpu/drm/i915/i915_cmd_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index c45856b..fb24dae 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -850,7 +850,7 @@ bool i915_needs_cmd_parser(struct intel_engine_cs *ring) * disabled. That will cause all of the parser's PPGTT checks to * fail. For now, disable parsing when PPGTT is off. */ - if (USES_PPGTT(ring-dev)) + if (!USES_PPGTT(ring-dev)) return false; return (i915.enable_cmd_parser == 1); -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 1/5] drm/i915: Implement a framework for batch buffer pools
From: Brad Volkin bradley.d.vol...@intel.com This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up; get to obtain a new buffer, put to return it to the pool. Note that all buffers must be returned to the pool before freeing it. Buffers are purgeable while in the pool, but not explicitly truncated in order to avoid overhead during execbuf. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- Documentation/DocBook/drm.tmpl | 5 + drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.h| 17 drivers/gpu/drm/i915/i915_gem.c| 1 + drivers/gpu/drm/i915/i915_gem_batch_pool.c | 153 + 5 files changed, 177 insertions(+) create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 4890d94..2749555 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -3945,6 +3945,11 @@ int num_ioctls;/synopsis !Pdrivers/gpu/drm/i915/i915_cmd_parser.c batch buffer command parser !Idrivers/gpu/drm/i915/i915_cmd_parser.c /sect2 + sect2 +titleBatchbuffer Pools/title +!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool +!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c + /sect2 /sect1 /chapter /part diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index cad1683..b92fbe6 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -17,6 +17,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o # GEM code i915-y += i915_cmd_parser.o \ + i915_gem_batch_pool.o \ i915_gem_context.o \ i915_gem_render_state.o \ i915_gem_debug.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 90216bb..a478a96 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1062,6 +1062,12 @@ struct intel_l3_parity { int which_slice; }; +struct i915_gem_batch_pool { + struct drm_device *dev; + struct list_head active_list; + struct list_head inactive_list; +}; + struct i915_gem_mm { /** Memory allocator for GTT stolen memory */ struct drm_mm stolen; @@ -1690,6 +1696,8 @@ struct drm_i915_gem_object { /** Used in execbuf to temporarily hold a ref */ struct list_head obj_exec_link; + struct list_head batch_pool_list; + /** * This is set if the object is on the active lists (has pending * rendering and so a non-zero seqno), and is not set if it i s on @@ -2594,6 +2602,15 @@ void i915_destroy_error_state(struct drm_device *dev); void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone); const char *i915_cache_level_str(int type); +/* i915_gem_batch_pool.c */ +void i915_gem_batch_pool_init(struct drm_device *dev, + struct i915_gem_batch_pool *pool); +void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool); +struct drm_i915_gem_object* +i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size); +void i915_gem_batch_pool_put(struct i915_gem_batch_pool *pool, +struct drm_i915_gem_object *obj); + /* i915_cmd_parser.c */ int i915_cmd_parser_get_version(void); int i915_cmd_parser_init_ring(struct intel_engine_cs *ring); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e5d4d73..89a4ec0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4332,6 +4332,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(obj-ring_list); INIT_LIST_HEAD(obj-obj_exec_link); INIT_LIST_HEAD(obj-vma_list); + INIT_LIST_HEAD(obj-batch_pool_list); obj-ops = ops; diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c new file mode 100644 index 000..6d526fa --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -0,0 +1,153 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated
[Intel-gfx] [PATCH 6/5] drm/i915: Add batch pool details to i915_gem_objects debugfs
From: Brad Volkin bradley.d.vol...@intel.com To better account for the potentially large memory consumption of the batch pool. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 45 + 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 696eb98..d4ec4ec 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -360,6 +360,38 @@ static int per_file_stats(int id, void *ptr, void *data) return 0; } +#define print_file_stats(m, name, stats) \ + seq_printf(m, %s: %u objects, %zu bytes (%zu active, %zu inactive, %zu global, %zu shared, %zu unbound)\n, \ + name, \ + stats.count, \ + stats.total, \ + stats.active, \ + stats.inactive, \ + stats.global, \ + stats.shared, \ + stats.unbound) + +static void print_batch_pool_stats(struct seq_file *m, + struct drm_i915_private *dev_priv) +{ + struct drm_i915_gem_object *obj; + struct file_stats stats; + + memset(stats, 0, sizeof(stats)); + + list_for_each_entry(obj, + dev_priv-mm.batch_pool.active_list, + batch_pool_list) + per_file_stats(0, obj, stats); + + list_for_each_entry(obj, + dev_priv-mm.batch_pool.inactive_list, + batch_pool_list) + per_file_stats(0, obj, stats); + + print_file_stats(m, batch pool, stats); +} + #define count_vmas(list, member) do { \ list_for_each_entry(vma, list, member) { \ size += i915_gem_obj_ggtt_size(vma-obj); \ @@ -442,6 +474,9 @@ static int i915_gem_object_info(struct seq_file *m, void* data) dev_priv-gtt.mappable_end - dev_priv-gtt.base.start); seq_putc(m, '\n'); + print_batch_pool_stats(m, dev_priv); + + seq_putc(m, '\n'); list_for_each_entry_reverse(file, dev-filelist, lhead) { struct file_stats stats; struct task_struct *task; @@ -459,15 +494,7 @@ static int i915_gem_object_info(struct seq_file *m, void* data) */ rcu_read_lock(); task = pid_task(file-pid, PIDTYPE_PID); - seq_printf(m, %s: %u objects, %zu bytes (%zu active, %zu inactive, %zu global, %zu shared, %zu unbound)\n, - task ? task-comm : unknown, - stats.count, - stats.total, - stats.active, - stats.inactive, - stats.global, - stats.shared, - stats.unbound); + print_file_stats(m, task ? task-comm : unknown, stats); rcu_read_unlock(); } -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 4/5] drm/i915: Dispatch the shadow batch buffer
From: Brad Volkin bradley.d.vol...@intel.com This is useful for testing the batch pool code with aliasing PPGTT. It doesn't work with full PPGTT though; the GPU hangs and the whole UI is corrupted. We need fixes for the secure dispatch path to enable this for real. v2: rebase on shadow_batch_obj replacing batch_obj Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 4c4bd66..908cf48 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1401,13 +1401,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, batch_obj = shadow_batch_obj; /* -* XXX: Actually do this when enabling batch copy... -* * Set the DISPATCH_SECURE bit to remove the NON_SECURE bit * from MI_BATCH_BUFFER_START commands issued in the * dispatch_execbuffer implementations. We specifically don't * want that set when the command parser is enabled. */ + flags |= I915_DISPATCH_SECURE; } /* snb/ivb/vlv conflate the batch in ppgtt bit with the non-secure -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 0/5] Command parser batch buffer copy
From: Brad Volkin bradley.d.vol...@intel.com This is v2 of the series I sent here: http://lists.freedesktop.org/archives/intel-gfx/2014-June/047609.html I believe that I've addressed all of the feedback except * I didn't move the allocation of the shadow batch buffer into parse_cmds(). It didn't seem like it added much value and would maybe complicate the error handling in do_execbuffer(). * I kept the part about attaching the shadow batch to the request, though in perhaps a less invasive way. My concern here is with the scheduler possibly reordering requests, I don't know if we'd still be able to implement the busy tracking in the pool as suggested. The commit message for patch 4 still applies: we aren't ready for that change until the secure dispatch regression is resolved, but it's needed for testing. I've added patch 5 to use batch_len instead of object size, as an optimization. My testing didn't show any perf difference, but I don't have any libva benchmarks to run, and that's where it sounded like the issue would be. I just tacked the patch onto the end of the series rather than squashing it in so we can easily take it or leave it as desired. Brad Volkin (5): drm/i915: Implement a framework for batch buffer pools drm/i915: Use batch pools with the command parser drm/i915: Add a batch pool debugfs file drm/i915: Dispatch the shadow batch buffer drm/i915: Use batch length instead of object size in command parser Documentation/DocBook/drm.tmpl | 5 ++ drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_cmd_parser.c | 88 +++ drivers/gpu/drm/i915/i915_debugfs.c| 41 + drivers/gpu/drm/i915/i915_dma.c| 1 + drivers/gpu/drm/i915/i915_drv.h| 26 ++ drivers/gpu/drm/i915/i915_gem.c| 10 +++ drivers/gpu/drm/i915/i915_gem_batch_pool.c | 133 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 39 - 9 files changed, 325 insertions(+), 19 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 1/5] drm/i915: Implement a framework for batch buffer pools
From: Brad Volkin bradley.d.vol...@intel.com This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up; get to obtain a new buffer, put to return it to the pool. Note that all buffers must be returned to the pool before freeing it. Buffers are purgeable while in the pool, but not explicitly truncated in order to avoid overhead during execbuf. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- Documentation/DocBook/drm.tmpl | 5 ++ drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.h| 17 drivers/gpu/drm/i915/i915_gem.c| 1 + drivers/gpu/drm/i915/i915_gem_batch_pool.c | 133 + 5 files changed, 157 insertions(+) create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 4890d94..2749555 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -3945,6 +3945,11 @@ int num_ioctls;/synopsis !Pdrivers/gpu/drm/i915/i915_cmd_parser.c batch buffer command parser !Idrivers/gpu/drm/i915/i915_cmd_parser.c /sect2 + sect2 +titleBatchbuffer Pools/title +!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool +!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c + /sect2 /sect1 /chapter /part diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index cad1683..b92fbe6 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -17,6 +17,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o # GEM code i915-y += i915_cmd_parser.o \ + i915_gem_batch_pool.o \ i915_gem_context.o \ i915_gem_render_state.o \ i915_gem_debug.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 90216bb..a478a96 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1062,6 +1062,12 @@ struct intel_l3_parity { int which_slice; }; +struct i915_gem_batch_pool { + struct drm_device *dev; + struct list_head active_list; + struct list_head inactive_list; +}; + struct i915_gem_mm { /** Memory allocator for GTT stolen memory */ struct drm_mm stolen; @@ -1690,6 +1696,8 @@ struct drm_i915_gem_object { /** Used in execbuf to temporarily hold a ref */ struct list_head obj_exec_link; + struct list_head batch_pool_list; + /** * This is set if the object is on the active lists (has pending * rendering and so a non-zero seqno), and is not set if it i s on @@ -2594,6 +2602,15 @@ void i915_destroy_error_state(struct drm_device *dev); void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone); const char *i915_cache_level_str(int type); +/* i915_gem_batch_pool.c */ +void i915_gem_batch_pool_init(struct drm_device *dev, + struct i915_gem_batch_pool *pool); +void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool); +struct drm_i915_gem_object* +i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size); +void i915_gem_batch_pool_put(struct i915_gem_batch_pool *pool, +struct drm_i915_gem_object *obj); + /* i915_cmd_parser.c */ int i915_cmd_parser_get_version(void); int i915_cmd_parser_init_ring(struct intel_engine_cs *ring); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e5d4d73..89a4ec0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4332,6 +4332,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(obj-ring_list); INIT_LIST_HEAD(obj-obj_exec_link); INIT_LIST_HEAD(obj-vma_list); + INIT_LIST_HEAD(obj-batch_pool_list); obj-ops = ops; diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c new file mode 100644 index 000..542477f --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -0,0 +1,133 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy,
[Intel-gfx] [PATCH v2 2/5] drm/i915: Use batch pools with the command parser
From: Brad Volkin bradley.d.vol...@intel.com This patch sets up all of the tracking and copying necessary to use batch pools with the command parser, but does not actually dispatch the copied (shadow) batch to the hardware yet. We still aren't quite ready to set the secure bit during dispatch. Note that performance takes a hit from the copy in some cases and will likely need some work. At a rough pass, the memcpy appears to be the bottleneck. Without having done a deeper analysis, two ideas that come to mind are: 1) Copy sections of the batch at a time, as they are reached by parsing. Might improve cache locality. 2) Copy only up to the userspace-supplied batch length and memset the rest of the buffer. Reduces the number of reads. v2: - Remove setting the capacity of the pool - One global pool instead of per-ring pools - Replace batch_obj with shadow_batch_obj and hook into eb-vmas - Memset any space in the shadow batch beyond what gets copied - Rebased on execlist prep refactoring Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 84 -- drivers/gpu/drm/i915/i915_dma.c| 1 + drivers/gpu/drm/i915/i915_drv.h| 8 +++ drivers/gpu/drm/i915/i915_gem.c| 9 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 35 + 5 files changed, 121 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index dea99d9..18788df 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -831,6 +831,56 @@ finish: return (u32*)addr; } +/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */ +static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, + struct drm_i915_gem_object *src_obj) +{ + int ret = 0; + int needs_clflush = 0; + u32 *src_addr, *dest_addr = NULL; + + ret = i915_gem_obj_prepare_shmem_read(src_obj, needs_clflush); + if (ret) { + DRM_DEBUG_DRIVER(CMD: failed to prep read\n); + return ERR_PTR(ret); + } + + src_addr = vmap_batch(src_obj); + if (!src_addr) { + DRM_DEBUG_DRIVER(CMD: Failed to vmap batch\n); + ret = -ENOMEM; + goto unpin_src; + } + + if (needs_clflush) + drm_clflush_virt_range((char *)src_addr, src_obj-base.size); + + ret = i915_gem_object_set_to_cpu_domain(dest_obj, true); + if (ret) { + DRM_DEBUG_DRIVER(CMD: Failed to set batch CPU domain\n); + goto unmap_src; + } + + dest_addr = vmap_batch(dest_obj); + if (!dest_addr) { + DRM_DEBUG_DRIVER(CMD: Failed to vmap shadow batch\n); + ret = -ENOMEM; + goto unmap_src; + } + + memcpy(dest_addr, src_addr, src_obj-base.size); + if (dest_obj-base.size src_obj-base.size) + memset((u8 *)dest_addr + src_obj-base.size, 0, + dest_obj-base.size - src_obj-base.size); + +unmap_src: + vunmap(src_addr); +unpin_src: + i915_gem_object_unpin_pages(src_obj); + + return ret ? ERR_PTR(ret) : dest_addr; +} + /** * i915_needs_cmd_parser() - should a given ring use software command parsing? * @ring: the ring in question @@ -952,6 +1002,7 @@ static bool check_cmd(const struct intel_engine_cs *ring, * i915_parse_cmds() - parse a submitted batch buffer for privilege violations * @ring: the ring on which the batch is to execute * @batch_obj: the batch buffer in question + * @shadow_batch_obj: copy of the batch buffer in question * @batch_start_offset: byte offset in the batch at which execution starts * @is_master: is the submitting process the drm master? * @@ -962,32 +1013,28 @@ static bool check_cmd(const struct intel_engine_cs *ring, */ int i915_parse_cmds(struct intel_engine_cs *ring, struct drm_i915_gem_object *batch_obj, + struct drm_i915_gem_object *shadow_batch_obj, u32 batch_start_offset, bool is_master) { int ret = 0; u32 *cmd, *batch_base, *batch_end; struct drm_i915_cmd_descriptor default_desc = { 0 }; - int needs_clflush = 0; bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */ - ret = i915_gem_obj_prepare_shmem_read(batch_obj, needs_clflush); - if (ret) { - DRM_DEBUG_DRIVER(CMD: failed to prep read\n); - return ret; + batch_base = copy_batch(shadow_batch_obj, batch_obj); + if (IS_ERR(batch_base)) { + DRM_DEBUG_DRIVER(CMD: Failed to copy batch\n); + return PTR_ERR(batch_base); } - batch_base = vmap_batch(batch_obj); - if (!batch_base) { - DRM_DEBUG_DRIVER(CMD: Failed to vmap batch\n); -
[Intel-gfx] [PATCH v2 5/5] drm/i915: Use batch length instead of object size in command parser
From: Brad Volkin bradley.d.vol...@intel.com Previously we couldn't trust the user-supplied batch length because it came directly from userspace (i.e. untrusted code). It would have affected what commands software parsed without regard to what hardware would actually execute, leaving a potential hole. With the parser now copying the user supplied batch buffer and writing MI_NOP commands to any space after the copied region, we can safely use the batch length input. This should be a performance win as the actual batch length is frequently much smaller than the allocated object size. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 20 +++- drivers/gpu/drm/i915/i915_drv.h| 1 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 1 + 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 18788df..2470d3b 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -833,7 +833,8 @@ finish: /* Returns a vmap'd pointer to dest_obj, which the caller must unmap */ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, - struct drm_i915_gem_object *src_obj) + struct drm_i915_gem_object *src_obj, + u32 batch_len) { int ret = 0; int needs_clflush = 0; @@ -853,7 +854,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, } if (needs_clflush) - drm_clflush_virt_range((char *)src_addr, src_obj-base.size); + drm_clflush_virt_range((char *)src_addr, batch_len); ret = i915_gem_object_set_to_cpu_domain(dest_obj, true); if (ret) { @@ -868,10 +869,10 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, goto unmap_src; } - memcpy(dest_addr, src_addr, src_obj-base.size); - if (dest_obj-base.size src_obj-base.size) - memset((u8 *)dest_addr + src_obj-base.size, 0, - dest_obj-base.size - src_obj-base.size); + memcpy(dest_addr, src_addr, batch_len); + if (dest_obj-base.size batch_len) + memset((u8 *)dest_addr + batch_len, 0, + dest_obj-base.size - batch_len); unmap_src: vunmap(src_addr); @@ -1015,6 +1016,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring, struct drm_i915_gem_object *batch_obj, struct drm_i915_gem_object *shadow_batch_obj, u32 batch_start_offset, + u32 batch_len, bool is_master) { int ret = 0; @@ -1022,7 +1024,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring, struct drm_i915_cmd_descriptor default_desc = { 0 }; bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */ - batch_base = copy_batch(shadow_batch_obj, batch_obj); + batch_base = copy_batch(shadow_batch_obj, batch_obj, batch_len); if (IS_ERR(batch_base)) { DRM_DEBUG_DRIVER(CMD: Failed to copy batch\n); return PTR_ERR(batch_base); @@ -1031,11 +1033,11 @@ int i915_parse_cmds(struct intel_engine_cs *ring, cmd = batch_base + (batch_start_offset / sizeof(*cmd)); /* -* We use the source object's size because the shadow object is as +* We use the batch length as size because the shadow object is as * large or larger and copy_batch() will write MI_NOPs to the extra * space. Parsing should be faster in some cases this way. */ - batch_end = cmd + (batch_obj-base.size / sizeof(*batch_end)); + batch_end = cmd + (batch_len / sizeof(*batch_end)); while (cmd batch_end) { const struct drm_i915_cmd_descriptor *desc; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a6b903d..49bcf79 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2627,6 +2627,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring, struct drm_i915_gem_object *batch_obj, struct drm_i915_gem_object *shadow_batch_obj, u32 batch_start_offset, + u32 batch_len, bool is_master); /* i915_suspend.c */ diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 908cf48..69ce030 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1388,6 +1388,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, batch_obj, shadow_batch_obj, args-batch_start_offset, + args-batch_len,
[Intel-gfx] [PATCH v2 3/5] drm/i915: Add a batch pool debugfs file
From: Brad Volkin bradley.d.vol...@intel.com It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 41 + 1 file changed, 41 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b3b56c4..696eb98 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -568,6 +568,46 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) return 0; } +static int i915_gem_batch_pool_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m-private; + struct drm_device *dev = node-minor-dev; + struct drm_i915_private *dev_priv = dev-dev_private; + struct drm_i915_gem_object *obj; + int count = 0; + int ret; + + ret = mutex_lock_interruptible(dev-struct_mutex); + if (ret) + return ret; + + seq_puts(m, active:\n); + list_for_each_entry(obj, + dev_priv-mm.batch_pool.active_list, + batch_pool_list) { + seq_puts(m,); + describe_obj(m, obj); + seq_putc(m, '\n'); + count++; + } + + seq_puts(m, inactive:\n); + list_for_each_entry(obj, + dev_priv-mm.batch_pool.inactive_list, + batch_pool_list) { + seq_puts(m,); + describe_obj(m, obj); + seq_putc(m, '\n'); + count++; + } + + seq_printf(m, total: %d\n, count); + + mutex_unlock(dev-struct_mutex); + + return 0; +} + static int i915_gem_request_info(struct seq_file *m, void *data) { struct drm_info_node *node = m-private; @@ -3950,6 +3990,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {i915_gem_hws_blt, i915_hws_info, 0, (void *)BCS}, {i915_gem_hws_bsd, i915_hws_info, 0, (void *)VCS}, {i915_gem_hws_vebox, i915_hws_info, 0, (void *)VECS}, + {i915_gem_batch_pool, i915_gem_batch_pool_info, 0}, {i915_rstdby_delays, i915_rstdby_delays, 0}, {i915_frequency_info, i915_frequency_info, 0}, {i915_delayfreq_table, i915_delayfreq_table, 0}, -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 1/4] drm/i915: Implement a framework for batch buffer pools
From: Brad Volkin bradley.d.vol...@intel.com This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: alloc to create an empty pool, free to clean it up; get to obtain a new buffer, put to return it to the pool. Note that all buffers must be returned to the pool before freeing it. The pool has a maximum number of buffers allowed due to some tests (e.g. gem_exec_nop) creating a very large number of buffers (e.g. ___). Buffers are purgeable while in the pool, but not explicitly truncated in order to avoid overhead during execbuf. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- r.e. pool capacity My original testing showed something like thousands of buffers in the pool after a gem_exec_nop run. But when I reran with the max check disabled just now to get an actual number for the commit message, the number was more like 130. I developed and tested the changes incrementally, and suspect that the original run was before I implemented the actual copy operation. So I'm inclined to remove or at least increase the cap in the final version. Thoughts? --- Documentation/DocBook/drm.tmpl | 5 + drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.h| 19 drivers/gpu/drm/i915/i915_gem.c| 1 + drivers/gpu/drm/i915/i915_gem_batch_pool.c | 151 + 5 files changed, 177 insertions(+) create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 7df3134..fcc0a1c 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -3939,6 +3939,11 @@ int num_ioctls;/synopsis !Pdrivers/gpu/drm/i915/i915_cmd_parser.c batch buffer command parser !Idrivers/gpu/drm/i915/i915_cmd_parser.c /sect2 + sect2 +titleBatchbuffer Pools/title +!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool +!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c + /sect2 /sect1 /chapter /part diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index cad1683..b92fbe6 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -17,6 +17,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o # GEM code i915-y += i915_cmd_parser.o \ + i915_gem_batch_pool.o \ i915_gem_context.o \ i915_gem_render_state.o \ i915_gem_debug.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0640071..2a88b5e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1610,6 +1610,8 @@ struct drm_i915_gem_object { /** Used in execbuf to temporarily hold a ref */ struct list_head obj_exec_link; + struct list_head batch_pool_list; + /** * This is set if the object is on the active lists (has pending * rendering and so a non-zero seqno), and is not set if it i s on @@ -1727,6 +1729,14 @@ struct drm_i915_gem_object { }; #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base) +struct i915_gem_batch_pool { + struct drm_device *dev; + struct list_head active_list; + struct list_head inactive_list; + int count; + int max_count; +}; + /** * Request queue structure. * @@ -2508,6 +2518,15 @@ void i915_destroy_error_state(struct drm_device *dev); void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone); const char *i915_cache_level_str(int type); +/* i915_gem_batch_pool.c */ +struct i915_gem_batch_pool *i915_gem_batch_pool_alloc(struct drm_device *dev, + int max_count); +void i915_gem_batch_pool_free(struct i915_gem_batch_pool *pool); +struct drm_i915_gem_object* +i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size); +void i915_gem_batch_pool_put(struct i915_gem_batch_pool *pool, +struct drm_i915_gem_object *obj); + /* i915_cmd_parser.c */ int i915_cmd_parser_get_version(void); int i915_cmd_parser_init_ring(struct intel_engine_cs *ring); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d857f58..d5e3001 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4324,6 +4324,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(obj-ring_list); INIT_LIST_HEAD(obj-obj_exec_link); INIT_LIST_HEAD(obj-vma_list); + INIT_LIST_HEAD(obj-batch_pool_list);
[Intel-gfx] [RFC 2/4] drm/i915: Use batch pools with the command parser
From: Brad Volkin bradley.d.vol...@intel.com This patch sets up all of the tracking and copying necessary to use batch pools with the command parser, but does not actually dispatch the copied (shadow) batch to the hardware yet. We still aren't quite ready to set the secure bit during dispatch. Note that performance takes a hit from the copy in some cases and will likely need some work. At a rough pass, the memcpy appears to be the bottleneck. Without having done a deeper analysis, two ideas that come to mind are: 1) Copy sections of the batch at a time, as they are reached by parsing. Might improve cache locality. 2) Copy only up to the userspace-supplied batch length and memset the rest of the buffer. Reduces the number of reads. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 75 ++-- drivers/gpu/drm/i915/i915_drv.h | 7 ++- drivers/gpu/drm/i915/i915_gem.c | 8 ++- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 45 +++-- drivers/gpu/drm/i915/i915_gem_render_state.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 12 + drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +++ 7 files changed, 134 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index dea99d9..669afb0 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -831,6 +831,53 @@ finish: return (u32*)addr; } +/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */ +static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, + struct drm_i915_gem_object *src_obj) +{ + int ret = 0; + int needs_clflush = 0; + u32 *src_addr, *dest_addr = NULL; + + ret = i915_gem_obj_prepare_shmem_read(src_obj, needs_clflush); + if (ret) { + DRM_DEBUG_DRIVER(CMD: failed to prep read\n); + return ERR_PTR(ret); + } + + src_addr = vmap_batch(src_obj); + if (!src_addr) { + DRM_DEBUG_DRIVER(CMD: Failed to vmap batch\n); + ret = -ENOMEM; + goto unpin_src; + } + + if (needs_clflush) + drm_clflush_virt_range((char *)src_addr, src_obj-base.size); + + ret = i915_gem_object_set_to_cpu_domain(dest_obj, true); + if (ret) { + DRM_DEBUG_DRIVER(CMD: Failed to set batch CPU domain\n); + goto unmap_src; + } + + dest_addr = vmap_batch(dest_obj); + if (!dest_addr) { + DRM_DEBUG_DRIVER(CMD: Failed to vmap shadow batch\n); + ret = -ENOMEM; + goto unmap_src; + } + + memcpy(dest_addr, src_addr, src_obj-base.size); + +unmap_src: + vunmap(src_addr); +unpin_src: + i915_gem_object_unpin_pages(src_obj); + + return ret ? ERR_PTR(ret) : dest_addr; +} + /** * i915_needs_cmd_parser() - should a given ring use software command parsing? * @ring: the ring in question @@ -952,6 +999,7 @@ static bool check_cmd(const struct intel_engine_cs *ring, * i915_parse_cmds() - parse a submitted batch buffer for privilege violations * @ring: the ring on which the batch is to execute * @batch_obj: the batch buffer in question + * @shadow_batch_obj: copy of the batch buffer in question * @batch_start_offset: byte offset in the batch at which execution starts * @is_master: is the submitting process the drm master? * @@ -962,31 +1010,21 @@ static bool check_cmd(const struct intel_engine_cs *ring, */ int i915_parse_cmds(struct intel_engine_cs *ring, struct drm_i915_gem_object *batch_obj, + struct drm_i915_gem_object *shadow_batch_obj, u32 batch_start_offset, bool is_master) { int ret = 0; u32 *cmd, *batch_base, *batch_end; struct drm_i915_cmd_descriptor default_desc = { 0 }; - int needs_clflush = 0; bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */ - ret = i915_gem_obj_prepare_shmem_read(batch_obj, needs_clflush); - if (ret) { - DRM_DEBUG_DRIVER(CMD: failed to prep read\n); - return ret; - } - - batch_base = vmap_batch(batch_obj); - if (!batch_base) { - DRM_DEBUG_DRIVER(CMD: Failed to vmap batch\n); - i915_gem_object_unpin_pages(batch_obj); - return -ENOMEM; + batch_base = copy_batch(shadow_batch_obj, batch_obj); + if (IS_ERR(batch_base)) { + DRM_DEBUG_DRIVER(CMD: Failed to copy batch\n); + return PTR_ERR(batch_base); } - if (needs_clflush) - drm_clflush_virt_range((char *)batch_base, batch_obj-base.size); - cmd = batch_base + (batch_start_offset / sizeof(*cmd)); batch_end = cmd + (batch_obj-base.size /
[Intel-gfx] [RFC 4/4] drm/i915: Dispatch the shadow batch buffer
From: Brad Volkin bradley.d.vol...@intel.com This is useful for testing the batch pool code with aliasing PPGTT. It doesn't work with full PPGTT though; the GPU hangs and the whole UI is corrupted. We need fixes for the secure dispatch path to enable this for real. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 37 -- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 0b263aa..981f66b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1321,31 +1321,34 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; /* -* XXX: Actually do this when enabling batch copy... -* * Set the DISPATCH_SECURE bit to remove the NON_SECURE bit * from MI_BATCH_BUFFER_START commands issued in the * dispatch_execbuffer implementations. We specifically don't * want that set when the command parser is enabled. */ + flags |= I915_DISPATCH_SECURE; } - /* snb/ivb/vlv conflate the batch in ppgtt bit with the non-secure -* batch bit. Hence we need to pin secure batches into the global gtt. -* hsw should have this fixed, but bdw mucks it up again. */ - if (flags I915_DISPATCH_SECURE - !batch_obj-has_global_gtt_mapping) { - /* When we have multiple VMs, we'll need to make sure that we -* allocate space first */ - struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj); - BUG_ON(!vma); - vma-bind_vma(vma, batch_obj-cache_level, GLOBAL_BIND); - } + if (!shadow_batch_obj) { + /* snb/ivb/vlv conflate the batch in ppgtt bit with the non-secure +* batch bit. Hence we need to pin secure batches into the global gtt. +* hsw should have this fixed, but bdw mucks it up again. */ + if (flags I915_DISPATCH_SECURE + !batch_obj-has_global_gtt_mapping) { + /* When we have multiple VMs, we'll need to make sure that we +* allocate space first */ + struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj); + BUG_ON(!vma); + vma-bind_vma(vma, batch_obj-cache_level, GLOBAL_BIND); + } - if (flags I915_DISPATCH_SECURE) - exec_start += i915_gem_obj_ggtt_offset(batch_obj); - else - exec_start += i915_gem_obj_offset(batch_obj, vm); + if (flags I915_DISPATCH_SECURE) + exec_start += i915_gem_obj_ggtt_offset(batch_obj); + else + exec_start += i915_gem_obj_offset(batch_obj, vm); + } else { + exec_start += i915_gem_obj_ggtt_offset(shadow_batch_obj); + } ret = i915_gem_execbuffer_move_to_gpu(ring, eb-vmas); if (ret) -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: Add some L3 registers to the parser whitelist
From: Brad Volkin bradley.d.vol...@intel.com Beignet needs these in order to program the L3 cache config for OpenCL workloads, particularly when using SLM. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 3 +++ drivers/gpu/drm/i915/i915_reg.h| 2 ++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 9d79543..dea99d9 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -426,6 +426,9 @@ static const u32 gen7_render_regs[] = { GEN7_SO_WRITE_OFFSET(1), GEN7_SO_WRITE_OFFSET(2), GEN7_SO_WRITE_OFFSET(3), + GEN7_L3SQCREG1, + GEN7_L3CNTLREG2, + GEN7_L3CNTLREG3, }; static const u32 gen7_blt_regs[] = { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e1fb0f2..3488567 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4670,6 +4670,8 @@ enum punit_power_well { #define GEN7_L3CNTLREG10xB01C #define GEN7_WA_FOR_GEN7_L3_CONTROL 0x3C47FF8C #define GEN7_L3AGDIS (119) +#define GEN7_L3CNTLREG20xB020 +#define GEN7_L3CNTLREG30xB024 #define GEN7_L3_CHICKEN_MODE_REGISTER 0xB030 #define GEN7_WA_L3_CHICKEN_MODE 0x2000 -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: Only check PPGTT bits when using PPGTT
From: Brad Volkin bradley.d.vol...@intel.com This extends use of the command parser to VLV. Note that the patch checks that the PPGTT bit is set appropriately when PPGTT is enabled but ignores it when PPGTT is disabled. It would be awkward to correctly invert the expected value to check that the bit is set appropriately in that case, and of limited value anyhow. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- I've confirmed that the shmem pread setup stuff we added does fix the caching issues I saw previously. I've done some basic testing with this on both IVB and VLV and don't see regressions. I don't have any data on the VLV perf impact though. Also, I considered splitting the patch up a bit differently but decided that a single patch seemed ok. I'm happy to split it up a bit if that's what people prefer. drivers/gpu/drm/i915/i915_cmd_parser.c | 187 + drivers/gpu/drm/i915/i915_drv.h| 8 +- 2 files changed, 104 insertions(+), 91 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 9d79543..fd35900 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -110,6 +110,7 @@ #define W CMD_DESC_REGISTER #define B CMD_DESC_BITMASK #define M CMD_DESC_MASTER +#define P CMD_DESC_PPGTT /*Command Mask Fixed Len Action -- */ @@ -124,20 +125,20 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ), CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, .reg = { .offset = 1, .mask = 0x007C } ), - CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, W | B, + CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, W | P, .reg = { .offset = 1, .mask = 0x007C }, - .bits = {{ + .ppgtt = { .offset = 0, .mask = MI_GLOBAL_GTT, .expected = 0, - }}, ), - CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W | B, + }, ), + CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W | P, .reg = { .offset = 1, .mask = 0x007C }, - .bits = {{ + .ppgtt = { .offset = 0, .mask = MI_GLOBAL_GTT, .expected = 0, - }}, ), + }, ), CMD( MI_BATCH_BUFFER_START,SMI, !F, 0xFF, S ), }; @@ -149,31 +150,31 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = { CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), CMD( MI_SET_CONTEXT, SMI, !F, 0xFF, R ), CMD( MI_URB_CLEAR, SMI, !F, 0xFF, S ), - CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3F, B, - .bits = {{ + CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3F, P, + .ppgtt = { .offset = 0, .mask = MI_GLOBAL_GTT, .expected = 0, - }}, ), + }, ), CMD( MI_UPDATE_GTT,SMI, !F, 0xFF, R ), - CMD( MI_CLFLUSH, SMI, !F, 0x3FF, B, - .bits = {{ + CMD( MI_CLFLUSH, SMI, !F, 0x3FF, P, + .ppgtt = { .offset = 0, .mask = MI_GLOBAL_GTT, .expected = 0, - }}, ), - CMD( MI_REPORT_PERF_COUNT, SMI, !F, 0x3F, B, - .bits = {{ + }, ), + CMD( MI_REPORT_PERF_COUNT, SMI, !F, 0x3F, P, + .ppgtt = { .offset = 1, .mask = MI_REPORT_PERF_COUNT_GGTT, .expected = 0, - }}, ), - CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, B, - .bits = {{ + }, ), + CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, P, + .ppgtt = { .offset = 0, .mask =
[Intel-gfx] [PATCH] drm/i915: Use hash tables for the command parser
From: Brad Volkin bradley.d.vol...@intel.com For clients that submit large batch buffers the command parser has a substantial impact on performance. On my HSW ULT system performance drops as much as ~20% on some tests. Most of the time is spent in the command lookup code. Converting that from the current naive search to a hash table lookup reduces the performance drop to ~10%. The choice of value for I915_CMD_HASH_ORDER allows all commands currently used in the parser tables to hash to their own bucket (except for one collision on the render ring). The tradeoff is that it wastes memory. Because the opcodes for the commands in the tables are not particularly well distributed, reducing the order still leaves many buckets empty. The increased collisions don't seem to have a huge impact on the performance gain, but for now anyhow, the parser trades memory for performance. NB: Ville noticed that the error paths through the ring init code will leak memory. I've not addressed that here. We can do a follow up pass to handle all of the leaks. v2: improved comment describing selection of hash key mask (Damien) replace a BUG_ON() with an error return (Tvrtko, Ville) commit message improvements Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 158 +--- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++- 4 files changed, 140 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 69d34e4..d3a5b74 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -498,16 +498,18 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) return 0; } -static bool validate_cmds_sorted(struct intel_ring_buffer *ring) +static bool validate_cmds_sorted(struct intel_ring_buffer *ring, +const struct drm_i915_cmd_table *cmd_tables, +int cmd_table_count) { int i; bool ret = true; - if (!ring-cmd_tables || ring-cmd_table_count == 0) + if (!cmd_tables || cmd_table_count == 0) return true; - for (i = 0; i ring-cmd_table_count; i++) { - const struct drm_i915_cmd_table *table = ring-cmd_tables[i]; + for (i = 0; i cmd_table_count; i++) { + const struct drm_i915_cmd_table *table = cmd_tables[i]; u32 previous = 0; int j; @@ -557,6 +559,68 @@ static bool validate_regs_sorted(struct intel_ring_buffer *ring) ring-master_reg_count); } +struct cmd_node { + const struct drm_i915_cmd_descriptor *desc; + struct hlist_node node; +}; + +/* + * Different command ranges have different numbers of bits for the opcode. For + * example, MI commands use bits 31:23 while 3D commands use bits 31:16. The + * problem is that, for example, MI commands use bits 22:16 for other fields + * such as GGTT vs PPGTT bits. If we include those bits in the mask then when + * we mask a command from a batch it could hash to the wrong bucket due to + * non-opcode bits being set. But if we don't include those bits, some 3D + * commands may hash to the same bucket due to not including opcode bits that + * make the command unique. For now, we will risk hashing to the same bucket. + * + * If we attempt to generate a perfect hash, we should be able to look at bits + * 31:29 of a command from a batch buffer and use the full mask for that + * client. The existing INSTR_CLIENT_MASK/SHIFT defines can be used for this. + */ +#define CMD_HASH_MASK STD_MI_OPCODE_MASK + +static int init_hash_table(struct intel_ring_buffer *ring, + const struct drm_i915_cmd_table *cmd_tables, + int cmd_table_count) +{ + int i, j; + + hash_init(ring-cmd_hash); + + for (i = 0; i cmd_table_count; i++) { + const struct drm_i915_cmd_table *table = cmd_tables[i]; + + for (j = 0; j table-count; j++) { + const struct drm_i915_cmd_descriptor *desc = + table-table[j]; + struct cmd_node *desc_node = + kmalloc(sizeof(*desc_node), GFP_KERNEL); + + if (!desc_node) + return -ENOMEM; + + desc_node-desc = desc; + hash_add(ring-cmd_hash, desc_node-node, +desc-cmd.value CMD_HASH_MASK); + } + } + + return 0; +} + +static void fini_hash_table(struct intel_ring_buffer *ring) +{ + struct hlist_node *tmp; + struct cmd_node *desc_node; + int i; + + hash_for_each_safe(ring-cmd_hash, i, tmp, desc_node, node) { +
[Intel-gfx] [PATCH] tests/gen7_forcewake_mt: Don't set the GGTT bit in SRM command
From: Brad Volkin bradley.d.vol...@intel.com The command parser in newer kernels will reject it and setting this bit is not required for the actual test case. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76670 Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- This is a resend of http://lists.freedesktop.org/archives/intel-gfx/2014-April/043223.html There was initially some discussion as to the fact that the test was written to reflect the implementation of a workaround in the ddx and whether this patch lead to a deviation between the two. There was no real closure on that discussion, however, I don't believe the MI_STORE_REGISTER_MEM aspect of the test is relevant to the ddx code, so I'd like to move forward with this or get clear direction on the preferred solution. tests/gen7_forcewake_mt.c | 55 +-- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/tests/gen7_forcewake_mt.c b/tests/gen7_forcewake_mt.c index fdc34ce..3afd80a 100644 --- a/tests/gen7_forcewake_mt.c +++ b/tests/gen7_forcewake_mt.c @@ -121,7 +121,7 @@ static void *thread(void *arg) } #define MI_LOAD_REGISTER_IMM(0x2223) -#define MI_STORE_REGISTER_MEM (0x2423| 122) +#define MI_STORE_REGISTER_MEM (0x2423) igt_simple_main { @@ -140,8 +140,9 @@ igt_simple_main sleep(2); for (i = 0; i 1000; i++) { + uint32_t *p; struct drm_i915_gem_execbuffer2 execbuf; - struct drm_i915_gem_exec_object2 exec; + struct drm_i915_gem_exec_object2 exec[2]; struct drm_i915_gem_relocation_entry reloc[2]; uint32_t b[] = { MI_LOAD_REGISTER_IMM | 1, @@ -149,54 +150,56 @@ igt_simple_main 2 16 | 2, MI_STORE_REGISTER_MEM | 1, FORCEWAKE_MT, - 5*sizeof(uint32_t), + 0, // to be patched MI_LOAD_REGISTER_IMM | 1, FORCEWAKE_MT, 2 16, MI_STORE_REGISTER_MEM | 1, FORCEWAKE_MT, - 11*sizeof(uint32_t), + 1 * sizeof(uint32_t), // to be patched MI_BATCH_BUFFER_END, 0 }; - memset(exec, 0, sizeof(exec)); - exec.handle = gem_create(t[0].fd, 4096); - exec.relocation_count = 2; - exec.relocs_ptr = (uintptr_t)reloc; - //exec.flags = EXEC_OBJECT_NEEDS_GTT; - gem_write(t[0].fd, exec.handle, 0, b, sizeof(b)); + memset(exec, 0, sizeof(exec)); + exec[1].handle = gem_create(t[0].fd, 4096); + exec[1].relocation_count = 2; + exec[1].relocs_ptr = (uintptr_t)reloc; + gem_write(t[0].fd, exec[1].handle, 0, b, sizeof(b)); + exec[0].handle = gem_create(t[0].fd, 4096); reloc[0].offset = 5 * sizeof(uint32_t); - reloc[0].delta = 5 * sizeof(uint32_t); - reloc[0].target_handle = exec.handle; - reloc[0].read_domains = I915_GEM_DOMAIN_INSTRUCTION; - reloc[0].write_domain = 0; + reloc[0].delta = 0; + reloc[0].target_handle = exec[0].handle; + reloc[0].read_domains = I915_GEM_DOMAIN_RENDER; + reloc[0].write_domain = I915_GEM_DOMAIN_RENDER; reloc[0].presumed_offset = 0; reloc[1].offset = 11 * sizeof(uint32_t); - reloc[1].delta = 11 * sizeof(uint32_t); - reloc[1].target_handle = exec.handle; - reloc[1].read_domains = I915_GEM_DOMAIN_INSTRUCTION; - reloc[1].write_domain = 0; + reloc[1].delta = 1 * sizeof(uint32_t); + reloc[1].target_handle = exec[0].handle; + reloc[1].read_domains = I915_GEM_DOMAIN_RENDER; + reloc[1].write_domain = I915_GEM_DOMAIN_RENDER; reloc[1].presumed_offset = 0; memset(execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)exec; - execbuf.buffer_count = 1; + execbuf.buffer_count = 2; execbuf.batch_len = sizeof(b); execbuf.flags = I915_EXEC_SECURE; gem_execbuf(t[0].fd, execbuf); - gem_sync(t[0].fd, exec.handle); - gem_read(t[0].fd, exec.handle, 0, b, sizeof(b)); - gem_close(t[0].fd, exec.handle); + gem_sync(t[0].fd, exec[1].handle); - printf([%d]={ %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x }\n, - i, b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9], b[10], b[11], b[12]); + p
[Intel-gfx] [PATCH] drm/i915: Use hash tables for the command parser
From: Brad Volkin bradley.d.vol...@intel.com For clients that submit large batch buffers the command parser has a substantial impact on performance. On my HSW ULT system performance drops as much as ~20% on some tests. Most of the time is spent in the command lookup code. Converting that from the current naive search to a hash table lookup reduces the performance impact by as much as ~10%. The choice of value for I915_CMD_HASH_ORDER allows all commands currently used in the parser tables to hash to their own bucket (except for one collision on the render ring). The tradeoff is that it wastes memory. Because the opcodes for the commands in the tables are not particularly well distributed, reducing the order still leaves many buckets empty. The increased collisions don't seem to have a huge impact on the performance gain, but for now anyhow, the parser trades memory for performance. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 136 drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++- 4 files changed, 116 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 9bac097..9dca899 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -498,16 +498,18 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) return 0; } -static bool validate_cmds_sorted(struct intel_ring_buffer *ring) +static bool validate_cmds_sorted(struct intel_ring_buffer *ring, +const struct drm_i915_cmd_table *cmd_tables, +int cmd_table_count) { int i; bool ret = true; - if (!ring-cmd_tables || ring-cmd_table_count == 0) + if (!cmd_tables || cmd_table_count == 0) return true; - for (i = 0; i ring-cmd_table_count; i++) { - const struct drm_i915_cmd_table *table = ring-cmd_tables[i]; + for (i = 0; i cmd_table_count; i++) { + const struct drm_i915_cmd_table *table = cmd_tables[i]; u32 previous = 0; int j; @@ -557,6 +559,60 @@ static bool validate_regs_sorted(struct intel_ring_buffer *ring) ring-master_reg_count); } +struct cmd_node { + const struct drm_i915_cmd_descriptor *desc; + struct hlist_node node; +}; + +/* + * Different command ranges have different numbers of bits for the opcode. + * In order to use the opcode bits, and only the opcode bits, for the hash key + * we should use the MI_* command opcode mask (since those commands use the + * fewest bits for the opcode.) + */ +#define CMD_HASH_MASK STD_MI_OPCODE_MASK + +static int init_hash_table(struct intel_ring_buffer *ring, + const struct drm_i915_cmd_table *cmd_tables, + int cmd_table_count) +{ + int i, j; + + hash_init(ring-cmd_hash); + + for (i = 0; i cmd_table_count; i++) { + const struct drm_i915_cmd_table *table = cmd_tables[i]; + + for (j = 0; j table-count; j++) { + const struct drm_i915_cmd_descriptor *desc = + table-table[j]; + struct cmd_node *desc_node = + kmalloc(sizeof(*desc_node), GFP_KERNEL); + + if (!desc_node) + return -ENOMEM; + + desc_node-desc = desc; + hash_add(ring-cmd_hash, desc_node-node, +desc-cmd.value CMD_HASH_MASK); + } + } + + return 0; +} + +static void fini_hash_table(struct intel_ring_buffer *ring) +{ + struct hlist_node *tmp; + struct cmd_node *desc_node; + int i; + + hash_for_each_safe(ring-cmd_hash, i, tmp, desc_node, node) { + hash_del(desc_node-node); + kfree(desc_node); + } +} + /** * i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer * @ring: the ringbuffer to initialize @@ -567,18 +623,21 @@ static bool validate_regs_sorted(struct intel_ring_buffer *ring) */ void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) { + const struct drm_i915_cmd_table *cmd_tables; + int cmd_table_count; + if (!IS_GEN7(ring-dev)) return; switch (ring-id) { case RCS: if (IS_HASWELL(ring-dev)) { - ring-cmd_tables = hsw_render_ring_cmds; - ring-cmd_table_count = + cmd_tables = hsw_render_ring_cmds; + cmd_table_count = ARRAY_SIZE(hsw_render_ring_cmds); } else { -
[Intel-gfx] [PATCH] SQUASH: drm/i915: One more register for mesa
From: Brad Volkin bradley.d.vol...@intel.com Originally left out because it wasn't used. But it may be needed and doesn't pose any risk, so add to the whitelist. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 1 + drivers/gpu/drm/i915/i915_reg.h| 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 3486ef7..9bac097 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -408,6 +408,7 @@ static const u32 gen7_render_regs[] = { REG64(PS_INVOCATION_COUNT), REG64(PS_DEPTH_COUNT), OACONTROL, /* Only allowed for LRI and SRM. See below. */ + GEN7_3DPRIM_END_OFFSET, GEN7_3DPRIM_START_VERTEX, GEN7_3DPRIM_VERTEX_COUNT, GEN7_3DPRIM_INSTANCE_COUNT, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f49569b..46ea233 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -430,6 +430,7 @@ #define GEN7_SO_PRIM_STORAGE_NEEDED(n) (0x5240 + (n) * 8) +#define GEN7_3DPRIM_END_OFFSET 0x2420 #define GEN7_3DPRIM_START_VERTEX0x2430 #define GEN7_3DPRIM_VERTEX_COUNT0x2434 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] tests/gen7_forcewake_mt: Don't set the GGTT bit in SRM command
From: Brad Volkin bradley.d.vol...@intel.com The command parser in newer kernels will reject it and setting this bit is not required for the actual test case. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76670 Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- tests/gen7_forcewake_mt.c | 55 +-- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/tests/gen7_forcewake_mt.c b/tests/gen7_forcewake_mt.c index fdc34ce..3afd80a 100644 --- a/tests/gen7_forcewake_mt.c +++ b/tests/gen7_forcewake_mt.c @@ -121,7 +121,7 @@ static void *thread(void *arg) } #define MI_LOAD_REGISTER_IMM(0x2223) -#define MI_STORE_REGISTER_MEM (0x2423| 122) +#define MI_STORE_REGISTER_MEM (0x2423) igt_simple_main { @@ -140,8 +140,9 @@ igt_simple_main sleep(2); for (i = 0; i 1000; i++) { + uint32_t *p; struct drm_i915_gem_execbuffer2 execbuf; - struct drm_i915_gem_exec_object2 exec; + struct drm_i915_gem_exec_object2 exec[2]; struct drm_i915_gem_relocation_entry reloc[2]; uint32_t b[] = { MI_LOAD_REGISTER_IMM | 1, @@ -149,54 +150,56 @@ igt_simple_main 2 16 | 2, MI_STORE_REGISTER_MEM | 1, FORCEWAKE_MT, - 5*sizeof(uint32_t), + 0, // to be patched MI_LOAD_REGISTER_IMM | 1, FORCEWAKE_MT, 2 16, MI_STORE_REGISTER_MEM | 1, FORCEWAKE_MT, - 11*sizeof(uint32_t), + 1 * sizeof(uint32_t), // to be patched MI_BATCH_BUFFER_END, 0 }; - memset(exec, 0, sizeof(exec)); - exec.handle = gem_create(t[0].fd, 4096); - exec.relocation_count = 2; - exec.relocs_ptr = (uintptr_t)reloc; - //exec.flags = EXEC_OBJECT_NEEDS_GTT; - gem_write(t[0].fd, exec.handle, 0, b, sizeof(b)); + memset(exec, 0, sizeof(exec)); + exec[1].handle = gem_create(t[0].fd, 4096); + exec[1].relocation_count = 2; + exec[1].relocs_ptr = (uintptr_t)reloc; + gem_write(t[0].fd, exec[1].handle, 0, b, sizeof(b)); + exec[0].handle = gem_create(t[0].fd, 4096); reloc[0].offset = 5 * sizeof(uint32_t); - reloc[0].delta = 5 * sizeof(uint32_t); - reloc[0].target_handle = exec.handle; - reloc[0].read_domains = I915_GEM_DOMAIN_INSTRUCTION; - reloc[0].write_domain = 0; + reloc[0].delta = 0; + reloc[0].target_handle = exec[0].handle; + reloc[0].read_domains = I915_GEM_DOMAIN_RENDER; + reloc[0].write_domain = I915_GEM_DOMAIN_RENDER; reloc[0].presumed_offset = 0; reloc[1].offset = 11 * sizeof(uint32_t); - reloc[1].delta = 11 * sizeof(uint32_t); - reloc[1].target_handle = exec.handle; - reloc[1].read_domains = I915_GEM_DOMAIN_INSTRUCTION; - reloc[1].write_domain = 0; + reloc[1].delta = 1 * sizeof(uint32_t); + reloc[1].target_handle = exec[0].handle; + reloc[1].read_domains = I915_GEM_DOMAIN_RENDER; + reloc[1].write_domain = I915_GEM_DOMAIN_RENDER; reloc[1].presumed_offset = 0; memset(execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)exec; - execbuf.buffer_count = 1; + execbuf.buffer_count = 2; execbuf.batch_len = sizeof(b); execbuf.flags = I915_EXEC_SECURE; gem_execbuf(t[0].fd, execbuf); - gem_sync(t[0].fd, exec.handle); - gem_read(t[0].fd, exec.handle, 0, b, sizeof(b)); - gem_close(t[0].fd, exec.handle); + gem_sync(t[0].fd, exec[1].handle); - printf([%d]={ %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x }\n, - i, b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9], b[10], b[11], b[12]); + p = gem_mmap(t[0].fd, exec[0].handle, 4096, PROT_READ); - igt_assert(b[5] 2); - igt_assert((b[11] 2) == 0); + printf([%d]={ %08x %08x }\n, i, p[0], p[1]); + igt_assert(p[0] 2); + igt_assert((p[1] 2) == 0); + + munmap(p, 4096); + gem_close(t[0].fd, exec[0].handle); + gem_close(t[0].fd, exec[1].handle); usleep(1000); } -- 1.8.3.2 ___
[Intel-gfx] [PATCH] drm/i915: Add more registers to the whitelist for mesa
From: Brad Volkin bradley.d.vol...@intel.com These are additional registers needed for performance monitoring and ARB_draw_indirect extensions in mesa. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76719 Cc: Kenneth Graunke kenn...@whitecape.org Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 9 + drivers/gpu/drm/i915/i915_reg.h| 8 2 files changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 29184d6..3486ef7 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -408,10 +408,19 @@ static const u32 gen7_render_regs[] = { REG64(PS_INVOCATION_COUNT), REG64(PS_DEPTH_COUNT), OACONTROL, /* Only allowed for LRI and SRM. See below. */ + GEN7_3DPRIM_START_VERTEX, + GEN7_3DPRIM_VERTEX_COUNT, + GEN7_3DPRIM_INSTANCE_COUNT, + GEN7_3DPRIM_START_INSTANCE, + GEN7_3DPRIM_BASE_VERTEX, REG64(GEN7_SO_NUM_PRIMS_WRITTEN(0)), REG64(GEN7_SO_NUM_PRIMS_WRITTEN(1)), REG64(GEN7_SO_NUM_PRIMS_WRITTEN(2)), REG64(GEN7_SO_NUM_PRIMS_WRITTEN(3)), + REG64(GEN7_SO_PRIM_STORAGE_NEEDED(0)), + REG64(GEN7_SO_PRIM_STORAGE_NEEDED(1)), + REG64(GEN7_SO_PRIM_STORAGE_NEEDED(2)), + REG64(GEN7_SO_PRIM_STORAGE_NEEDED(3)), GEN7_SO_WRITE_OFFSET(0), GEN7_SO_WRITE_OFFSET(1), GEN7_SO_WRITE_OFFSET(2), diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8e60737..533ec0a 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -427,6 +427,14 @@ /* There are the 4 64-bit counter registers, one for each stream output */ #define GEN7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8) +#define GEN7_SO_PRIM_STORAGE_NEEDED(n) (0x5240 + (n) * 8) + +#define GEN7_3DPRIM_START_VERTEX0x2430 +#define GEN7_3DPRIM_VERTEX_COUNT0x2434 +#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 +#define GEN7_3DPRIM_START_INSTANCE 0x243C +#define GEN7_3DPRIM_BASE_VERTEX 0x2440 + #define OACONTROL 0x2360 #define _GEN7_PIPEA_DE_LOAD_SL 0x70068 -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 3/3] drm/i915: Track OACONTROL register enable/disable during parsing
From: Brad Volkin bradley.d.vol...@intel.com There is some thought that the data from the performance counters enabled via OACONTROL should only be available to the process that enabled counting. To limit snooping, require that any batch buffer which sets OACONTROL to a non-zero value also sets it back to 0 before the end of the batch. This requires limiting OACONTROL writes to happen via MI_LOAD_REGISTER_IMM so that we can access the value being written. This should be in line with the expected use case for writing OACONTROL. v2: Drop an unnecessary '? true : false' Cc: Kenneth Graunke kenn...@whitecape.org Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 35 ++ 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 2eb2aca..34e2d45 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -407,12 +407,7 @@ static const u32 gen7_render_regs[] = { REG64(CL_PRIMITIVES_COUNT), REG64(PS_INVOCATION_COUNT), REG64(PS_DEPTH_COUNT), - /* -* FIXME: This is just to keep mesa working for now, we need to check -* that mesa resets this again and that it doesn't use any of the -* special modes which write into the gtt. -*/ - OACONTROL, + OACONTROL, /* Only allowed for LRI and SRM. See below. */ REG64(GEN7_SO_NUM_PRIMS_WRITTEN(0)), REG64(GEN7_SO_NUM_PRIMS_WRITTEN(1)), REG64(GEN7_SO_NUM_PRIMS_WRITTEN(2)), @@ -761,7 +756,8 @@ bool i915_needs_cmd_parser(struct intel_ring_buffer *ring) static bool check_cmd(const struct intel_ring_buffer *ring, const struct drm_i915_cmd_descriptor *desc, const u32 *cmd, - const bool is_master) + const bool is_master, + bool *oacontrol_set) { if (desc-flags CMD_DESC_REJECT) { DRM_DEBUG_DRIVER(CMD: Rejected command: 0x%08X\n, *cmd); @@ -777,6 +773,23 @@ static bool check_cmd(const struct intel_ring_buffer *ring, if (desc-flags CMD_DESC_REGISTER) { u32 reg_addr = cmd[desc-reg.offset] desc-reg.mask; + /* +* OACONTROL requires some special handling for writes. We +* want to make sure that any batch which enables OA also +* disables it before the end of the batch. The goal is to +* prevent one process from snooping on the perf data from +* another process. To do that, we need to check the value +* that will be written to the register. Hence, limit +* OACONTROL writes to only MI_LOAD_REGISTER_IMM commands. +*/ + if (reg_addr == OACONTROL) { + if (desc-cmd.value == MI_LOAD_REGISTER_MEM) + return false; + + if (desc-cmd.value == MI_LOAD_REGISTER_IMM(1)) + *oacontrol_set = (cmd[2] != 0); + } + if (!valid_reg(ring-reg_table, ring-reg_count, reg_addr)) { if (!is_master || @@ -851,6 +864,7 @@ int i915_parse_cmds(struct intel_ring_buffer *ring, u32 *cmd, *batch_base, *batch_end; struct drm_i915_cmd_descriptor default_desc = { 0 }; int needs_clflush = 0; + bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */ ret = i915_gem_obj_prepare_shmem_read(batch_obj, needs_clflush); if (ret) { @@ -900,7 +914,7 @@ int i915_parse_cmds(struct intel_ring_buffer *ring, break; } - if (!check_cmd(ring, desc, cmd, is_master)) { + if (!check_cmd(ring, desc, cmd, is_master, oacontrol_set)) { ret = -EINVAL; break; } @@ -908,6 +922,11 @@ int i915_parse_cmds(struct intel_ring_buffer *ring, cmd += length; } + if (oacontrol_set) { + DRM_DEBUG_DRIVER(CMD: batch set OACONTROL but did not clear it\n); + ret = -EINVAL; + } + if (cmd = batch_end) { DRM_DEBUG_DRIVER(CMD: Got to the end of the buffer w/o a BBE cmd!\n); ret = -EINVAL; -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/3] drm/i915: Refactor cmd parser checks into a function
From: Brad Volkin bradley.d.vol...@intel.com This brings the code a little more in line with kernel coding style. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 136 + 1 file changed, 71 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 8a93db3..2eb2aca 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -758,6 +758,76 @@ bool i915_needs_cmd_parser(struct intel_ring_buffer *ring) return (i915.enable_cmd_parser == 1); } +static bool check_cmd(const struct intel_ring_buffer *ring, + const struct drm_i915_cmd_descriptor *desc, + const u32 *cmd, + const bool is_master) +{ + if (desc-flags CMD_DESC_REJECT) { + DRM_DEBUG_DRIVER(CMD: Rejected command: 0x%08X\n, *cmd); + return false; + } + + if ((desc-flags CMD_DESC_MASTER) !is_master) { + DRM_DEBUG_DRIVER(CMD: Rejected master-only command: 0x%08X\n, +*cmd); + return false; + } + + if (desc-flags CMD_DESC_REGISTER) { + u32 reg_addr = cmd[desc-reg.offset] desc-reg.mask; + + if (!valid_reg(ring-reg_table, + ring-reg_count, reg_addr)) { + if (!is_master || + !valid_reg(ring-master_reg_table, + ring-master_reg_count, + reg_addr)) { + DRM_DEBUG_DRIVER(CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n, +reg_addr, +*cmd, +ring-id); + return false; + } + } + } + + if (desc-flags CMD_DESC_BITMASK) { + int i; + + for (i = 0; i MAX_CMD_DESC_BITMASKS; i++) { + u32 dword; + + if (desc-bits[i].mask == 0) + break; + + if (desc-bits[i].condition_mask != 0) { + u32 offset = + desc-bits[i].condition_offset; + u32 condition = cmd[offset] + desc-bits[i].condition_mask; + + if (condition == 0) + continue; + } + + dword = cmd[desc-bits[i].offset] + desc-bits[i].mask; + + if (dword != desc-bits[i].expected) { + DRM_DEBUG_DRIVER(CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (ring=%d)\n, +*cmd, +desc-bits[i].mask, +desc-bits[i].expected, +dword, ring-id); + return false; + } + } + } + + return true; +} + #define LENGTH_BIAS 2 /** @@ -830,75 +900,11 @@ int i915_parse_cmds(struct intel_ring_buffer *ring, break; } - if (desc-flags CMD_DESC_REJECT) { - DRM_DEBUG_DRIVER(CMD: Rejected command: 0x%08X\n, *cmd); + if (!check_cmd(ring, desc, cmd, is_master)) { ret = -EINVAL; break; } - if ((desc-flags CMD_DESC_MASTER) !is_master) { - DRM_DEBUG_DRIVER(CMD: Rejected master-only command: 0x%08X\n, -*cmd); - ret = -EINVAL; - break; - } - - if (desc-flags CMD_DESC_REGISTER) { - u32 reg_addr = cmd[desc-reg.offset] desc-reg.mask; - - if (!valid_reg(ring-reg_table, - ring-reg_count, reg_addr)) { - if (!is_master || - !valid_reg(ring-master_reg_table, - ring-master_reg_count, - reg_addr)) { - DRM_DEBUG_DRIVER(CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n, -reg_addr, -*cmd, -
[Intel-gfx] [PATCH 3/3] drm/i915: Track OACONTROL register enable/disable during parsing
From: Brad Volkin bradley.d.vol...@intel.com There is some thought that the data from the performance counters enabled via OACONTROL should only be available to the process that enabled counting. To limit snooping, require that any batch buffer which sets OACONTROL to a non-zero value also sets it back to 0 before the end of the batch. This requires limiting OACONTROL writes to happen via MI_LOAD_REGISTER_IMM so that we can access the value being written. This should be in line with the expected use case for writing OACONTROL. Cc: Kenneth Graunke kenn...@whitecape.org Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 35 ++ 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 2eb2aca..779e14c 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -407,12 +407,7 @@ static const u32 gen7_render_regs[] = { REG64(CL_PRIMITIVES_COUNT), REG64(PS_INVOCATION_COUNT), REG64(PS_DEPTH_COUNT), - /* -* FIXME: This is just to keep mesa working for now, we need to check -* that mesa resets this again and that it doesn't use any of the -* special modes which write into the gtt. -*/ - OACONTROL, + OACONTROL, /* Only allowed for LRI and SRM. See below. */ REG64(GEN7_SO_NUM_PRIMS_WRITTEN(0)), REG64(GEN7_SO_NUM_PRIMS_WRITTEN(1)), REG64(GEN7_SO_NUM_PRIMS_WRITTEN(2)), @@ -761,7 +756,8 @@ bool i915_needs_cmd_parser(struct intel_ring_buffer *ring) static bool check_cmd(const struct intel_ring_buffer *ring, const struct drm_i915_cmd_descriptor *desc, const u32 *cmd, - const bool is_master) + const bool is_master, + bool *oacontrol_set) { if (desc-flags CMD_DESC_REJECT) { DRM_DEBUG_DRIVER(CMD: Rejected command: 0x%08X\n, *cmd); @@ -777,6 +773,23 @@ static bool check_cmd(const struct intel_ring_buffer *ring, if (desc-flags CMD_DESC_REGISTER) { u32 reg_addr = cmd[desc-reg.offset] desc-reg.mask; + /* +* OACONTROL requires some special handling for writes. We +* want to make sure that any batch which enables OA also +* disables it before the end of the batch. The goal is to +* prevent one process from snooping on the perf data from +* another process. To do that, we need to check the value +* that will be written to the register. Hence, limit +* OACONTROL writes to only MI_LOAD_REGISTER_IMM commands. +*/ + if (reg_addr == OACONTROL) { + if (desc-cmd.value == MI_LOAD_REGISTER_MEM) + return false; + + if (desc-cmd.value == MI_LOAD_REGISTER_IMM(1)) + *oacontrol_set = (cmd[2] != 0) ? true : false; + } + if (!valid_reg(ring-reg_table, ring-reg_count, reg_addr)) { if (!is_master || @@ -851,6 +864,7 @@ int i915_parse_cmds(struct intel_ring_buffer *ring, u32 *cmd, *batch_base, *batch_end; struct drm_i915_cmd_descriptor default_desc = { 0 }; int needs_clflush = 0; + bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */ ret = i915_gem_obj_prepare_shmem_read(batch_obj, needs_clflush); if (ret) { @@ -900,7 +914,7 @@ int i915_parse_cmds(struct intel_ring_buffer *ring, break; } - if (!check_cmd(ring, desc, cmd, is_master)) { + if (!check_cmd(ring, desc, cmd, is_master, oacontrol_set)) { ret = -EINVAL; break; } @@ -908,6 +922,11 @@ int i915_parse_cmds(struct intel_ring_buffer *ring, cmd += length; } + if (oacontrol_set) { + DRM_DEBUG_DRIVER(CMD: batch set OACONTROL but did not clear it\n); + ret = -EINVAL; + } + if (cmd = batch_end) { DRM_DEBUG_DRIVER(CMD: Got to the end of the buffer w/o a BBE cmd!\n); ret = -EINVAL; -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 0/3] Fix up cmd parser OACONTROL handling + refactorings
From: Brad Volkin bradley.d.vol...@intel.com Patches 1 and 2 do some cleanups suggested as part of the review process. Patch 3 continues the OACONTROL handling fixes from the other day. I think patches 1 and 2 are valuable on their own. I think the need/benefit for the tracking provided by patch 3 is somewhat unclear. Per Ken: I don't really buy the snooping problem, though...just because I leave OACONTROL set doesn't mean I'll get useful data. Another context might clobber it, and empirically the numbers seem to reset across RC6 anyway. So in actuality, they're likely to get bogus data. Even if they did somehow miraculously get decent values, it basically gives information akin to 'top', which is unprivileged on every system I've ever used. That argument makes sense to me, but I've gone ahead and written the patch in the event that we do want it. Brad Volkin (3): drm/i915: BUG_ON() when cmd/reg tables are not sorted drm/i915: Refactor cmd parser checks into a function drm/i915: Track OACONTROL register enable/disable during parsing drivers/gpu/drm/i915/i915_cmd_parser.c | 198 +++-- 1 file changed, 117 insertions(+), 81 deletions(-) -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/3] drm/i915: BUG_ON() when cmd/reg tables are not sorted
From: Brad Volkin bradley.d.vol...@intel.com As suggested during review, this makes it much more obvious when the tables are not sorted. Cc: Jani Nikula jani.nik...@linux.intel.com Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 31 +-- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 788bd96..8a93db3 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -493,12 +493,13 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) return 0; } -static void validate_cmds_sorted(struct intel_ring_buffer *ring) +static bool validate_cmds_sorted(struct intel_ring_buffer *ring) { int i; + bool ret = true; if (!ring-cmd_tables || ring-cmd_table_count == 0) - return; + return true; for (i = 0; i ring-cmd_table_count; i++) { const struct drm_i915_cmd_table *table = ring-cmd_tables[i]; @@ -510,35 +511,45 @@ static void validate_cmds_sorted(struct intel_ring_buffer *ring) table-table[i]; u32 curr = desc-cmd.value desc-cmd.mask; - if (curr previous) + if (curr previous) { DRM_ERROR(CMD: table not sorted ring=%d table=%d entry=%d cmd=0x%08X prev=0x%08X\n, ring-id, i, j, curr, previous); + ret = false; + } previous = curr; } } + + return ret; } -static void check_sorted(int ring_id, const u32 *reg_table, int reg_count) +static bool check_sorted(int ring_id, const u32 *reg_table, int reg_count) { int i; u32 previous = 0; + bool ret = true; for (i = 0; i reg_count; i++) { u32 curr = reg_table[i]; - if (curr previous) + if (curr previous) { DRM_ERROR(CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n, ring_id, i, curr, previous); + ret = false; + } previous = curr; } + + return ret; } -static void validate_regs_sorted(struct intel_ring_buffer *ring) +static bool validate_regs_sorted(struct intel_ring_buffer *ring) { - check_sorted(ring-id, ring-reg_table, ring-reg_count); - check_sorted(ring-id, ring-master_reg_table, ring-master_reg_count); + return check_sorted(ring-id, ring-reg_table, ring-reg_count) + check_sorted(ring-id, ring-master_reg_table, +ring-master_reg_count); } /** @@ -617,8 +628,8 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) BUG(); } - validate_cmds_sorted(ring); - validate_regs_sorted(ring); + BUG_ON(!validate_cmds_sorted(ring)); + BUG_ON(!validate_regs_sorted(ring)); } static const struct drm_i915_cmd_descriptor* -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] tests/gem_exec_parse: Test for OACONTROL tracking
From: Brad Volkin bradley.d.vol...@intel.com Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- tests/gem_exec_parse.c | 48 1 file changed, 48 insertions(+) diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c index 34d097d..853eb57 100644 --- a/tests/gem_exec_parse.c +++ b/tests/gem_exec_parse.c @@ -204,6 +204,8 @@ int fd; #define PIPE_CONTROL_QW_WRITE(114) #define PIPE_CONTROL_LRI_POST_OP (123) +#define OACONTROL 0x2360 + igt_main { igt_fixture { @@ -337,6 +339,52 @@ igt_main 0)); } + igt_subtest(oacontrol-tracking) { + uint32_t lri_ok[] = { + MI_LOAD_REGISTER_IMM, + OACONTROL, + 0x31337000, + MI_LOAD_REGISTER_IMM, + OACONTROL, + 0x0, + MI_BATCH_BUFFER_END, + 0 + }; + uint32_t lri_bad[] = { + MI_LOAD_REGISTER_IMM, + OACONTROL, + 0x31337000, + MI_BATCH_BUFFER_END, + }; + uint32_t lri_extra_bad[] = { + MI_LOAD_REGISTER_IMM, + OACONTROL, + 0x31337000, + MI_LOAD_REGISTER_IMM, + OACONTROL, + 0x0, + MI_LOAD_REGISTER_IMM, + OACONTROL, + 0x31337000, + MI_BATCH_BUFFER_END, + }; + igt_assert( + exec_batch(fd, handle, + lri_ok, sizeof(lri_ok), + I915_EXEC_RENDER, + 0)); + igt_assert( + exec_batch(fd, handle, + lri_bad, sizeof(lri_bad), + I915_EXEC_RENDER, + -EINVAL)); + igt_assert( + exec_batch(fd, handle, + lri_extra_bad, sizeof(lri_extra_bad), + I915_EXEC_RENDER, + -EINVAL)); + } + igt_fixture { gem_close(fd, handle); -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 07/13] drm/i915: Add register whitelist for DRM master
From: Brad Volkin bradley.d.vol...@intel.com These are used to implement scanline waits in the X server. v2: Use #defines instead of magic numbers Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 29 + drivers/gpu/drm/i915/i915_reg.h| 6 ++ 2 files changed, 35 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 4347a30..353e5cf 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -281,6 +281,19 @@ static const u32 gen7_blt_regs[] = { BCS_SWCTRL, }; +static const u32 ivb_master_regs[] = { + FORCEWAKE_MT, + DERRMR, + GEN7_PIPE_DE_LOAD_SL(PIPE_A), + GEN7_PIPE_DE_LOAD_SL(PIPE_B), + GEN7_PIPE_DE_LOAD_SL(PIPE_C), +}; + +static const u32 hsw_master_regs[] = { + FORCEWAKE_MT, + DERRMR, +}; + #undef REG64 static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) @@ -409,6 +422,14 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) ring-reg_table = gen7_render_regs; ring-reg_count = ARRAY_SIZE(gen7_render_regs); + if (IS_HASWELL(ring-dev)) { + ring-master_reg_table = hsw_master_regs; + ring-master_reg_count = ARRAY_SIZE(hsw_master_regs); + } else { + ring-master_reg_table = ivb_master_regs; + ring-master_reg_count = ARRAY_SIZE(ivb_master_regs); + } + ring-get_cmd_length_mask = gen7_render_get_cmd_length_mask; break; case VCS: @@ -428,6 +449,14 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) ring-reg_table = gen7_blt_regs; ring-reg_count = ARRAY_SIZE(gen7_blt_regs); + if (IS_HASWELL(ring-dev)) { + ring-master_reg_table = hsw_master_regs; + ring-master_reg_count = ARRAY_SIZE(hsw_master_regs); + } else { + ring-master_reg_table = ivb_master_regs; + ring-master_reg_count = ARRAY_SIZE(ivb_master_regs); + } + ring-get_cmd_length_mask = gen7_blt_get_cmd_length_mask; break; case VECS: diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 1f2aeba..87523df 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -415,6 +415,12 @@ /* There are the 4 64-bit counter registers, one for each stream output */ #define GEN7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8) +#define _GEN7_PIPEA_DE_LOAD_SL 0x70068 +#define _GEN7_PIPEB_DE_LOAD_SL 0x71068 +#define GEN7_PIPE_DE_LOAD_SL(pipe) _PIPE(pipe, \ +_GEN7_PIPEA_DE_LOAD_SL, \ +_GEN7_PIPEB_DE_LOAD_SL) + /* * Reset registers */ -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 09/13] drm/i915: Reject commands that explicitly generate interrupts
From: Brad Volkin bradley.d.vol...@intel.com The driver leaves most interrupts masked during normal operation, so there would have to be additional work to enable userspace to safely request/receive an interrupt. v2: trailing commas, rebased Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 22 -- drivers/gpu/drm/i915/i915_reg.h| 1 + 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 4f14a24..0351df1 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -115,7 +115,7 @@ -- */ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_NOOP, SMI,F, 1, S ), - CMD( MI_USER_INTERRUPT,SMI,F, 1, S ), + CMD( MI_USER_INTERRUPT,SMI,F, 1, R ), CMD( MI_WAIT_FOR_EVENT,SMI,F, 1, M ), CMD( MI_ARB_CHECK, SMI,F, 1, S ), CMD( MI_REPORT_HEAD, SMI,F, 1, S ), @@ -156,7 +156,7 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = { CMD( GFX_OP_PIPE_CONTROL(5), S3D, !F, 0xFF, B, .bits = {{ .offset = 1, - .mask = PIPE_CONTROL_MMIO_WRITE, + .mask = (PIPE_CONTROL_MMIO_WRITE | PIPE_CONTROL_NOTIFY), .expected = 0, }}, ), }; @@ -186,6 +186,12 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = { CMD( MI_ARB_ON_OFF,SMI,F, 1, R ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, S ), CMD( MI_UPDATE_GTT,SMI, !F, 0x3F, R ), + CMD( MI_FLUSH_DW, SMI, !F, 0x3F, B, + .bits = {{ + .offset = 0, + .mask = MI_FLUSH_DW_NOTIFY, + .expected = 0, + }}, ), CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, S ), /* * MFX_WAIT doesn't fit the way we handle length for most commands. @@ -199,6 +205,12 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = { CMD( MI_ARB_ON_OFF,SMI,F, 1, R ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, S ), CMD( MI_UPDATE_GTT,SMI, !F, 0x3F, R ), + CMD( MI_FLUSH_DW, SMI, !F, 0x3F, B, + .bits = {{ + .offset = 0, + .mask = MI_FLUSH_DW_NOTIFY, + .expected = 0, + }}, ), CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, S ), }; @@ -206,6 +218,12 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = { CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ), CMD( MI_UPDATE_GTT,SMI, !F, 0x3F, R ), + CMD( MI_FLUSH_DW, SMI, !F, 0x3F, B, + .bits = {{ + .offset = 0, + .mask = MI_FLUSH_DW_NOTIFY, + .expected = 0, + }}, ), CMD( COLOR_BLT,S2D, !F, 0x3F, S ), CMD( SRC_COPY_BLT, S2D, !F, 0x3F, S ), }; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 11cca96..e6dd7e9 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -269,6 +269,7 @@ #define MI_FLUSH_DW_STORE_INDEX (121) #define MI_INVALIDATE_TLB(118) #define MI_FLUSH_DW_OP_STOREDW (114) +#define MI_FLUSH_DW_NOTIFY (18) #define MI_INVALIDATE_BSD(17) #define MI_FLUSH_DW_USE_GTT (12) #define MI_FLUSH_DW_USE_PPGTT(02) -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 06/13] drm/i915: Add register whitelists for mesa
From: Brad Volkin bradley.d.vol...@intel.com These registers are currently used by mesa for blitting, transform feedback extensions, and performance monitoring extensions. v2: REG64 macro Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 45 ++ drivers/gpu/drm/i915/i915_reg.h| 20 +++ 2 files changed, 65 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index cf03ba6..4347a30 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -244,6 +244,45 @@ static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = { { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, }; +/* + * Register whitelists, sorted by increasing register offset. + * + * Some registers that userspace accesses are 64 bits. The register + * access commands only allow 32-bit accesses. Hence, we have to include + * entries for both halves of the 64-bit registers. + */ + +/* Convenience macro for adding 64-bit registers */ +#define REG64(addr) (addr), (addr + sizeof(u32)) + +static const u32 gen7_render_regs[] = { + REG64(HS_INVOCATION_COUNT), + REG64(DS_INVOCATION_COUNT), + REG64(IA_VERTICES_COUNT), + REG64(IA_PRIMITIVES_COUNT), + REG64(VS_INVOCATION_COUNT), + REG64(GS_INVOCATION_COUNT), + REG64(GS_PRIMITIVES_COUNT), + REG64(CL_INVOCATION_COUNT), + REG64(CL_PRIMITIVES_COUNT), + REG64(PS_INVOCATION_COUNT), + REG64(PS_DEPTH_COUNT), + REG64(GEN7_SO_NUM_PRIMS_WRITTEN(0)), + REG64(GEN7_SO_NUM_PRIMS_WRITTEN(1)), + REG64(GEN7_SO_NUM_PRIMS_WRITTEN(2)), + REG64(GEN7_SO_NUM_PRIMS_WRITTEN(3)), + GEN7_SO_WRITE_OFFSET(0), + GEN7_SO_WRITE_OFFSET(1), + GEN7_SO_WRITE_OFFSET(2), + GEN7_SO_WRITE_OFFSET(3), +}; + +static const u32 gen7_blt_regs[] = { + BCS_SWCTRL, +}; + +#undef REG64 + static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) { u32 client = (cmd_header INSTR_CLIENT_MASK) INSTR_CLIENT_SHIFT; @@ -367,6 +406,9 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) ring-cmd_table_count = ARRAY_SIZE(gen7_render_cmds); } + ring-reg_table = gen7_render_regs; + ring-reg_count = ARRAY_SIZE(gen7_render_regs); + ring-get_cmd_length_mask = gen7_render_get_cmd_length_mask; break; case VCS: @@ -383,6 +425,9 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) ring-cmd_table_count = ARRAY_SIZE(gen7_blt_cmds); } + ring-reg_table = gen7_blt_regs; + ring-reg_count = ARRAY_SIZE(gen7_blt_regs); + ring-get_cmd_length_mask = gen7_blt_get_cmd_length_mask; break; case VECS: diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 23be06a..1f2aeba 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -396,6 +396,26 @@ #define SRC_COPY_BLT ((0x229)|(0x4322)) /* + * Registers used only by the command parser + */ +#define BCS_SWCTRL 0x22200 + +#define HS_INVOCATION_COUNT 0x2300 +#define DS_INVOCATION_COUNT 0x2308 +#define IA_VERTICES_COUNT 0x2310 +#define IA_PRIMITIVES_COUNT 0x2318 +#define VS_INVOCATION_COUNT 0x2320 +#define GS_INVOCATION_COUNT 0x2328 +#define GS_PRIMITIVES_COUNT 0x2330 +#define CL_INVOCATION_COUNT 0x2338 +#define CL_PRIMITIVES_COUNT 0x2340 +#define PS_INVOCATION_COUNT 0x2348 +#define PS_DEPTH_COUNT 0x2350 + +/* There are the 4 64-bit counter registers, one for each stream output */ +#define GEN7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8) + +/* * Reset registers */ #define DEBUG_RESET_I830 0x6070 -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 00/13] Gen7 batch buffer command parser
From: Brad Volkin bradley.d.vol...@intel.com Certain OpenGL features (e.g. transform feedback, performance monitoring) require userspace code to submit batches containing commands such as MI_LOAD_REGISTER_IMM to access various registers. Unfortunately, some generations of the hardware will noop these commands in unsecure batches (which includes all userspace batches submitted via i915) even though the commands may be safe and represent the intended programming model of the device. This series introduces a software command parser similar in operation to the command parsing done in hardware for unsecure batches. However, the software parser allows some operations that would be noop'd by hardware, if the parser determines the operation is safe, and submits the batch as secure to prevent hardware parsing. Currently the series implements this on IVB and HSW. The series has one piece of prep work, one patch for the parser logic, and a handful of patches to fill out the tables which drive the parser. There are follow-up patches to libdrm and to i-g-t. The i-g-t tests are basic and do not test all of the commands used by the parser on the assumption that I'm likely to make the same mistakes in both the parser and the test. I've previously run the i-g-t gem_* tests, the piglit quick tests, and generally used Ubuntu 13.10 IVB and HSW systems with the parser running. Aside from a failure described below, I did not see any regressions. At this point there are a couple of required/potential improvements. 1) Chained batches. The parser currently allows MI_BATCH_BUFFER_START commands in userspace batches without parsing them. The media driver uses chained batches, so a solution is required. I'm still working through the requirements but don't want to continue delaying the review process for what I have so far. 2) Command buffer copy. To avoid CPU modifications to buffers after parsing, and to avoid GPU modifications to buffers via EUs or commands in the batch, we should copy the userspace batch buffer to memory that userspace does not have access to, map it into GGTT, and execute that batch buffer. I have a sense of how to do this for 1st-level batches, but it may need changes to tie in with the chained batch parsing, so I've again held off. 3) Coherency. I've previously found a coherency issue on VLV when reading the batch buffer from the CPU during execbuffer2. Userspace writes the batch via pwrite fast path before calling execbuffer2. The parser reads stale data. This works fine on IVB and HSW, so I believe it's an LLC vs. non-LLC issue. It's possible that the shmem pread refactoring fixes this, I just have not been able to retest due to lack of a VLV system. v2: - Significantly reorder series - Scan secure batches (i.e. I915_EXEC_SECURE) - Check that parser tables are sorted during init - Fixed gem_cpu_reloc regression - HAS_CMD_PARSER - CMD_PARSER_VERSION getparam - Additional tests v3: - Don't actually send batches as secure yet - Improved documentation and commenting - Many other small cleanups throughout Brad Volkin (13): drm/i915: Refactor shmem pread setup drm/i915: Implement command buffer parsing logic drm/i915: Initial command parser table definitions drm/i915: Reject privileged commands drm/i915: Allow some privileged commands from master drm/i915: Add register whitelists for mesa drm/i915: Add register whitelist for DRM master drm/i915: Enable register whitelist checks drm/i915: Reject commands that explicitly generate interrupts drm/i915: Enable PPGTT command parser checks drm/i915: Reject commands that would store to global HWS page drm/i915: Add a CMD_PARSER_VERSION getparam drm/i915: Enable command parsing by default drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_cmd_parser.c | 918 + drivers/gpu/drm/i915/i915_dma.c| 3 + drivers/gpu/drm/i915/i915_drv.h| 103 drivers/gpu/drm/i915/i915_gem.c| 51 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 18 + drivers/gpu/drm/i915/i915_params.c | 5 + drivers/gpu/drm/i915/i915_reg.h| 96 +++ drivers/gpu/drm/i915/intel_ringbuffer.c| 2 + drivers/gpu/drm/i915/intel_ringbuffer.h| 32 + include/uapi/drm/i915_drm.h| 1 + 11 files changed, 1216 insertions(+), 14 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_cmd_parser.c -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 13/13] drm/i915: Enable command parsing by default
From: Brad Volkin bradley.d.vol...@intel.com v2: rebased OTC-Tracker: AXIA-4631 Change-Id: I6747457e1fe7494bd42787af51198fcba398ad78 Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_params.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index aba0b9b..9e394bc 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -48,7 +48,7 @@ struct i915_params i915 __read_mostly = { .reset = true, .invert_brightness = 0, .disable_display = 0, - .enable_cmd_parser = 0, + .enable_cmd_parser = 1, }; module_param_named(modeset, i915.modeset, int, 0400); @@ -161,4 +161,4 @@ MODULE_PARM_DESC(disable_display, Disable display (default: false)); module_param_named(enable_cmd_parser, i915.enable_cmd_parser, int, 0600); MODULE_PARM_DESC(enable_cmd_parser, - Enable command parsing (1=enabled, 0=disabled [default])); + Enable command parsing (1=enabled [default], 0=disabled)); -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 3/6] tests/gem_exec_parse: Add tests for register whitelist
From: Brad Volkin bradley.d.vol...@intel.com Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- tests/gem_exec_parse.c | 26 ++ 1 file changed, 26 insertions(+) diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c index ebf7116..48fde25 100644 --- a/tests/gem_exec_parse.c +++ b/tests/gem_exec_parse.c @@ -141,6 +141,7 @@ int fd; #define MI_ARB_ON_OFF (0x8 23) #define MI_DISPLAY_FLIP ((0x14 23) | 1) +#define MI_LOAD_REGISTER_IMM ((0x22 23) | 1) #define GFX_OP_PIPE_CONTROL((0x329)|(0x327)|(0x224)|2) #define PIPE_CONTROL_QW_WRITE(114) @@ -213,6 +214,31 @@ igt_main -EINVAL)); } + igt_subtest(registers) { + uint32_t lri_bad[] = { + MI_LOAD_REGISTER_IMM, + 0, // disallowed register address + 0x1200, + MI_BATCH_BUFFER_END, + }; + uint32_t lri_ok[] = { + MI_LOAD_REGISTER_IMM, + 0x5280, // allowed register address (SO_WRITE_OFFSET[0]) + 0x1, + MI_BATCH_BUFFER_END, + }; + igt_assert( + exec_batch(fd, handle, + lri_bad, sizeof(lri_bad), + I915_EXEC_RENDER, + -EINVAL)); + igt_assert( + exec_batch(fd, handle, + lri_ok, sizeof(lri_ok), + I915_EXEC_RENDER, + 0)); + } + igt_fixture { gem_close(fd, handle); -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 05/13] drm/i915: Allow some privileged commands from master
From: Brad Volkin bradley.d.vol...@intel.com The Intel DDX uses these to implement scanline waits in the X server. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 90bbb6d..cf03ba6 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -116,7 +116,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_NOOP, SMI,F, 1, S ), CMD( MI_USER_INTERRUPT,SMI,F, 1, S ), - CMD( MI_WAIT_FOR_EVENT,SMI,F, 1, R ), + CMD( MI_WAIT_FOR_EVENT,SMI,F, 1, M ), CMD( MI_ARB_CHECK, SMI,F, 1, S ), CMD( MI_REPORT_HEAD, SMI,F, 1, S ), CMD( MI_SUSPEND_FLUSH, SMI,F, 1, S ), @@ -151,7 +151,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { CMD( MI_RS_CONTROL,SMI,F, 1, S ), CMD( MI_URB_ATOMIC_ALLOC, SMI,F, 1, S ), CMD( MI_RS_CONTEXT,SMI,F, 1, S ), - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, R ), CMD( MI_RS_STORE_DATA_IMM, SMI, !F, 0xFF, S ), @@ -196,7 +196,7 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = { }; static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), }; -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 6/6] tests/gem_exec_parse: Test a command crossing a page boundary
From: Brad Volkin bradley.d.vol...@intel.com This is a speculative test in that it's not particularly relevant today, but is important if we switch the parser implementation to use kmap_atomic instead of vmap. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- tests/gem_exec_parse.c | 68 ++ 1 file changed, 68 insertions(+) diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c index 004c3bf..455bfbf 100644 --- a/tests/gem_exec_parse.c +++ b/tests/gem_exec_parse.c @@ -136,6 +136,60 @@ static int exec_batch(int fd, uint32_t cmd_bo, uint32_t *cmds, return 1; } +static int exec_split_batch(int fd, uint32_t *cmds, + int size, int ring, int expected_ret) +{ + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 objs[1]; + uint32_t cmd_bo; + uint32_t noop[1024] = { 0 }; + int ret; + + // Allocate and fill a 2-page batch with noops + cmd_bo = gem_create(fd, 4096 * 2); + gem_write(fd, cmd_bo, 0, noop, sizeof(noop)); + gem_write(fd, cmd_bo, 4096, noop, sizeof(noop)); + + // Write the provided commands such that the first dword + // of the command buffer is the last dword of the first + // page (i.e. the command is split across the two pages). + gem_write(fd, cmd_bo, 4096-sizeof(uint32_t), cmds, size); + + objs[0].handle = cmd_bo; + objs[0].relocation_count = 0; + objs[0].relocs_ptr = 0; + objs[0].alignment = 0; + objs[0].offset = 0; + objs[0].flags = 0; + objs[0].rsvd1 = 0; + objs[0].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t)objs; + execbuf.buffer_count = 1; + execbuf.batch_start_offset = 0; + execbuf.batch_len = size; + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + execbuf.flags = ring; + i915_execbuffer2_set_context_id(execbuf, 0); + execbuf.rsvd2 = 0; + + ret = drmIoctl(fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2, + execbuf); + if (ret == 0) + igt_assert(expected_ret == 0); + else + igt_assert(-errno == expected_ret); + + gem_sync(fd, cmd_bo); + gem_close(fd, cmd_bo); + + return 1; +} + uint32_t handle; int fd; @@ -266,6 +320,20 @@ igt_main -EINVAL)); } + igt_subtest(cmd-crossing-page) { + uint32_t lri_ok[] = { + MI_LOAD_REGISTER_IMM, + 0x5280, // allowed register address (SO_WRITE_OFFSET[0]) + 0x1, + MI_BATCH_BUFFER_END, + }; + igt_assert( + exec_split_batch(fd, + lri_ok, sizeof(lri_ok), + I915_EXEC_RENDER, + 0)); + } + igt_fixture { gem_close(fd, handle); -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/6] tests/gem_exec_parse: Add tests for rejected commands
From: Brad Volkin bradley.d.vol...@intel.com Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- tests/gem_exec_parse.c | 81 ++ 1 file changed, 81 insertions(+) diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c index c71e478..ebf7116 100644 --- a/tests/gem_exec_parse.c +++ b/tests/gem_exec_parse.c @@ -93,9 +93,55 @@ static int exec_batch_patched(int fd, uint32_t cmd_bo, uint32_t *cmds, return 1; } +static int exec_batch(int fd, uint32_t cmd_bo, uint32_t *cmds, + int size, int ring, int expected_ret) +{ + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 objs[1]; + int ret; + + gem_write(fd, cmd_bo, 0, cmds, size); + + objs[0].handle = cmd_bo; + objs[0].relocation_count = 0; + objs[0].relocs_ptr = 0; + objs[0].alignment = 0; + objs[0].offset = 0; + objs[0].flags = 0; + objs[0].rsvd1 = 0; + objs[0].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t)objs; + execbuf.buffer_count = 1; + execbuf.batch_start_offset = 0; + execbuf.batch_len = size; + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + execbuf.flags = ring; + i915_execbuffer2_set_context_id(execbuf, 0); + execbuf.rsvd2 = 0; + + ret = drmIoctl(fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2, + execbuf); + if (ret == 0) + igt_assert(expected_ret == 0); + else + igt_assert(-errno == expected_ret); + + gem_sync(fd, cmd_bo); + + return 1; +} + uint32_t handle; int fd; +#define MI_ARB_ON_OFF (0x8 23) +#define MI_DISPLAY_FLIP ((0x14 23) | 1) + #define GFX_OP_PIPE_CONTROL((0x329)|(0x327)|(0x224)|2) #define PIPE_CONTROL_QW_WRITE(114) @@ -132,6 +178,41 @@ igt_main 0x1200)); } + igt_subtest(basic-rejected) { + uint32_t arb_on_off[] = { + MI_ARB_ON_OFF, + MI_BATCH_BUFFER_END, + }; + uint32_t display_flip[] = { + MI_DISPLAY_FLIP, + 0, 0, 0, + MI_BATCH_BUFFER_END, + 0 + }; + igt_assert( + exec_batch(fd, handle, + arb_on_off, sizeof(arb_on_off), + I915_EXEC_RENDER, + -EINVAL)); + igt_assert( + exec_batch(fd, handle, + arb_on_off, sizeof(arb_on_off), + I915_EXEC_BSD, + -EINVAL)); + if (gem_has_vebox(fd)) { + igt_assert( + exec_batch(fd, handle, + arb_on_off, sizeof(arb_on_off), + I915_EXEC_VEBOX, + -EINVAL)); + } + igt_assert( + exec_batch(fd, handle, + display_flip, sizeof(display_flip), + I915_EXEC_BLT, + -EINVAL)); + } + igt_fixture { gem_close(fd, handle); -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 02/13] drm/i915: Implement command buffer parsing logic
From: Brad Volkin bradley.d.vol...@intel.com The command parser scans batch buffers submitted via execbuffer ioctls before the driver submits them to hardware. At a high level, it looks for several things: 1) Commands which are explicitly defined as privileged or which should only be used by the kernel driver. The parser generally rejects such commands, with the provision that it may allow some from the drm master process. 2) Commands which access registers. To support correct/enhanced userspace functionality, particularly certain OpenGL extensions, the parser provides a whitelist of registers which userspace may safely access (for both normal and drm master processes). 3) Commands which access privileged memory (i.e. GGTT, HWS page, etc). The parser always rejects such commands. See the overview comment in the source for more details. This patch only implements the logic. Subsequent patches will build the tables that drive the parser. v2: Don't set the secure bit if the parser succeeds Fail harder during init Makefile cleanup Kerneldoc cleanup Clarify module param description Convert ints to bools in a few places Move client/subclient defs to i915_reg.h Remove the bits_count field OTC-Tracker: AXIA-4631 Change-Id: I50b98c71c6655893291c78a2d1b8954577b37a30 Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_cmd_parser.c | 485 + drivers/gpu/drm/i915/i915_drv.h| 93 ++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 18 ++ drivers/gpu/drm/i915/i915_params.c | 5 + drivers/gpu/drm/i915/i915_reg.h| 12 + drivers/gpu/drm/i915/intel_ringbuffer.c| 2 + drivers/gpu/drm/i915/intel_ringbuffer.h| 32 ++ 8 files changed, 648 insertions(+) create mode 100644 drivers/gpu/drm/i915/i915_cmd_parser.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 4850494..3569122 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -14,6 +14,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \ i915_gem_gtt.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ + i915_cmd_parser.o \ i915_params.o \ i915_sysfs.o \ i915_trace_points.o \ diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c new file mode 100644 index 000..7a5756e --- /dev/null +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -0,0 +1,485 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Brad Volkin bradley.d.vol...@intel.com + * + */ + +#include i915_drv.h + +/** + * DOC: i915 batch buffer command parser + * + * Motivation: + * Certain OpenGL features (e.g. transform feedback, performance monitoring) + * require userspace code to submit batches containing commands such as + * MI_LOAD_REGISTER_IMM to access various registers. Unfortunately, some + * generations of the hardware will noop these commands in unsecure batches + * (which includes all userspace batches submitted via i915) even though the + * commands may be safe and represent the intended programming model of the + * device. + * + * The software command parser is similar in operation to the command parsing + * done in hardware for unsecure batches. However, the software parser allows + * some operations that would be noop'd by hardware, if the parser determines + * the operation is safe, and submits the batch as secure to prevent hardware + * parsing. + * + * Threats: + * At a high level, the hardware (and software) checks attempt to prevent + * granting userspace undue privileges. There are three categories of privilege. + * + * First, commands which are explicitly defined as privileged or which should + * only be used by the kernel driver. The
[Intel-gfx] [PATCH 1/6] tests: Add a test for the command parser
From: Brad Volkin bradley.d.vol...@intel.com Start with a simple testcase that should pass. v2: Switch to I915_PARAM_CMD_PARSER_VERSION Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- tests/.gitignore | 1 + tests/Makefile.sources | 1 + tests/gem_exec_parse.c | 140 + 3 files changed, 142 insertions(+) create mode 100644 tests/gem_exec_parse.c diff --git a/tests/.gitignore b/tests/.gitignore index cb548a8..8b0b790 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -35,6 +35,7 @@ gem_exec_blt gem_exec_faulting_reloc gem_exec_lut_handle gem_exec_nop +gem_exec_parse gem_fd_exhaustion gem_fenced_exec_thrash gem_fence_thrash diff --git a/tests/Makefile.sources b/tests/Makefile.sources index afb2582..2475f7e 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -29,6 +29,7 @@ TESTS_progs_M = \ gem_exec_bad_domains \ gem_exec_faulting_reloc \ gem_exec_nop \ + gem_exec_parse \ gem_fenced_exec_thrash \ gem_fence_thrash \ gem_flink \ diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c new file mode 100644 index 000..c71e478 --- /dev/null +++ b/tests/gem_exec_parse.c @@ -0,0 +1,140 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include stdlib.h +#include stdint.h +#include stdio.h +#include drm.h +#include i915_drm.h +#include drmtest.h + +#ifndef I915_PARAM_CMD_PARSER_VERSION +#define I915_PARAM_CMD_PARSER_VERSION 28 +#endif + +static int exec_batch_patched(int fd, uint32_t cmd_bo, uint32_t *cmds, + int size, int patch_offset, uint64_t expected_value) +{ + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 objs[2]; + struct drm_i915_gem_relocation_entry reloc[1]; + + uint32_t target_bo = gem_create(fd, 4096); + uint64_t actual_value = 0; + + gem_write(fd, cmd_bo, 0, cmds, size); + + reloc[0].offset = patch_offset; + reloc[0].delta = 0; + reloc[0].target_handle = target_bo; + reloc[0].read_domains = I915_GEM_DOMAIN_RENDER; + reloc[0].write_domain = I915_GEM_DOMAIN_RENDER; + reloc[0].presumed_offset = 0; + + objs[0].handle = target_bo; + objs[0].relocation_count = 0; + objs[0].relocs_ptr = 0; + objs[0].alignment = 0; + objs[0].offset = 0; + objs[0].flags = 0; + objs[0].rsvd1 = 0; + objs[0].rsvd2 = 0; + + objs[1].handle = cmd_bo; + objs[1].relocation_count = 1; + objs[1].relocs_ptr = (uintptr_t)reloc; + objs[1].alignment = 0; + objs[1].offset = 0; + objs[1].flags = 0; + objs[1].rsvd1 = 0; + objs[1].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t)objs; + execbuf.buffer_count = 2; + execbuf.batch_start_offset = 0; + execbuf.batch_len = size; + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + execbuf.flags = I915_EXEC_RENDER; + i915_execbuffer2_set_context_id(execbuf, 0); + execbuf.rsvd2 = 0; + + gem_execbuf(fd, execbuf); + gem_sync(fd, cmd_bo); + + gem_read(fd,target_bo, 0, actual_value, sizeof(actual_value)); + igt_assert(expected_value == actual_value); + + gem_close(fd, target_bo); + + return 1; +} + +uint32_t handle; +int fd; + +#define GFX_OP_PIPE_CONTROL((0x329)|(0x327)|(0x224)|2) +#define PIPE_CONTROL_QW_WRITE(114) + +igt_main +{ + igt_fixture { + int parser_version = 0; +drm_i915_getparam_t gp; + int rc; + + fd = drm_open_any(); + + gp.param = I915_PARAM_CMD_PARSER_VERSION; + gp.value = parser_version; + rc = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM,
[Intel-gfx] [PATCH 4/6] tests/gem_exec_parse: Add tests for bitmask checks
From: Brad Volkin bradley.d.vol...@intel.com Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- tests/gem_exec_parse.c | 18 ++ 1 file changed, 18 insertions(+) diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c index 48fde25..9e90408 100644 --- a/tests/gem_exec_parse.c +++ b/tests/gem_exec_parse.c @@ -145,6 +145,7 @@ int fd; #define GFX_OP_PIPE_CONTROL((0x329)|(0x327)|(0x224)|2) #define PIPE_CONTROL_QW_WRITE(114) +#define PIPE_CONTROL_LRI_POST_OP (123) igt_main { @@ -239,6 +240,23 @@ igt_main 0)); } + igt_subtest(bitmasks) { + uint32_t pc[] = { + GFX_OP_PIPE_CONTROL, + (PIPE_CONTROL_QW_WRITE | +PIPE_CONTROL_LRI_POST_OP), + 0, // To be patched + 0x1200, + 0, + MI_BATCH_BUFFER_END, + }; + igt_assert( + exec_batch(fd, handle, + pc, sizeof(pc), + I915_EXEC_RENDER, + -EINVAL)); + } + igt_fixture { gem_close(fd, handle); -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 03/13] drm/i915: Initial command parser table definitions
From: Brad Volkin bradley.d.vol...@intel.com Add command tables defining irregular length commands for each ring. This requires a few new command opcode definitions. v2: Whitespace adjustment in command definitions, sparse fix for !F OTC-Tracker: AXIA-4631 Change-Id: I064bceb457e15f46928058352afe76d918c58ef5 Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 157 + drivers/gpu/drm/i915/i915_reg.h| 46 ++ 2 files changed, 203 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 7a5756e..12241e8 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -86,6 +86,148 @@ * general bitmasking mechanism. */ +#define STD_MI_OPCODE_MASK 0xFF80 +#define STD_3D_OPCODE_MASK 0x +#define STD_2D_OPCODE_MASK 0xFFC0 +#define STD_MFX_OPCODE_MASK 0x + +#define CMD(op, opm, f, lm, fl, ...) \ + { \ + .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ + .cmd = { (op), (opm) }, \ + .length = { (lm) }, \ + __VA_ARGS__ \ + } + +/* Convenience macros to compress the tables */ +#define SMI STD_MI_OPCODE_MASK +#define S3D STD_3D_OPCODE_MASK +#define S2D STD_2D_OPCODE_MASK +#define SMFX STD_MFX_OPCODE_MASK +#define F true +#define S CMD_DESC_SKIP +#define R CMD_DESC_REJECT +#define W CMD_DESC_REGISTER +#define B CMD_DESC_BITMASK +#define M CMD_DESC_MASTER + +/*Command Mask Fixed Len Action + -- */ +static const struct drm_i915_cmd_descriptor common_cmds[] = { + CMD( MI_NOOP, SMI,F, 1, S ), + CMD( MI_USER_INTERRUPT,SMI,F, 1, S ), + CMD( MI_WAIT_FOR_EVENT,SMI,F, 1, S ), + CMD( MI_ARB_CHECK, SMI,F, 1, S ), + CMD( MI_REPORT_HEAD, SMI,F, 1, S ), + CMD( MI_SUSPEND_FLUSH, SMI,F, 1, S ), + CMD( MI_SEMAPHORE_MBOX,SMI, !F, 0xFF, S ), + CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, S ), + CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, S ), + CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, S ), + CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, S ), + CMD( MI_BATCH_BUFFER_START,SMI, !F, 0xFF, S ), +}; + +static const struct drm_i915_cmd_descriptor render_cmds[] = { + CMD( MI_FLUSH, SMI,F, 1, S ), + CMD( MI_ARB_ON_OFF,SMI,F, 1, S ), + CMD( MI_PREDICATE, SMI,F, 1, S ), + CMD( MI_TOPOLOGY_FILTER, SMI,F, 1, S ), + CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, S ), + CMD( MI_SET_CONTEXT, SMI, !F, 0xFF, S ), + CMD( MI_URB_CLEAR, SMI, !F, 0xFF, S ), + CMD( MI_UPDATE_GTT,SMI, !F, 0xFF, S ), + CMD( MI_CLFLUSH, SMI, !F, 0x3FF, S ), + CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, S ), + CMD( GFX_OP_3DSTATE_VF_STATISTICS, S3D,F, 1, S ), + CMD( PIPELINE_SELECT, S3D,F, 1, S ), + CMD( GPGPU_OBJECT, S3D, !F, 0xFF, S ), + CMD( GPGPU_WALKER, S3D, !F, 0xFF, S ), + CMD( GFX_OP_3DSTATE_SO_DECL_LIST, S3D, !F, 0x1FF, S ), +}; + +static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { + CMD( MI_SET_PREDICATE, SMI,F, 1, S ), + CMD( MI_RS_CONTROL,SMI,F, 1, S ), + CMD( MI_URB_ATOMIC_ALLOC, SMI,F, 1, S ), + CMD( MI_RS_CONTEXT,SMI,F, 1, S ), + CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, S ), + CMD( MI_RS_STORE_DATA_IMM, SMI, !F, 0xFF, S ), + CMD( MI_LOAD_URB_MEM, SMI, !F, 0xFF, S ), + CMD( MI_STORE_URB_MEM, SMI, !F, 0xFF, S ), + CMD( GFX_OP_3DSTATE_DX9_CONSTANTF_VS, S3D, !F, 0x7FF, S ), + CMD( GFX_OP_3DSTATE_DX9_CONSTANTF_PS, S3D, !F, 0x7FF, S ), + + CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS, S3D, !F, 0x1FF, S ), + CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS, S3D, !F, 0x1FF, S ), + CMD(
[Intel-gfx] [PATCH] intel: Merge i915_drm.h with cmd parser define
From: Brad Volkin bradley.d.vol...@intel.com Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- include/drm/i915_drm.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 2f4eb8c..ba863c4 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -27,7 +27,7 @@ #ifndef _I915_DRM_H_ #define _I915_DRM_H_ -#include drm.h +#include drm/drm.h /* Please note that modifications to all structs defined here are * subject to backwards-compatibility constraints. @@ -337,6 +337,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_EXEC_NO_RELOC25 #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 #define I915_PARAM_HAS_WT 27 +#define I915_PARAM_CMD_PARSER_VERSION 28 typedef struct drm_i915_getparam { int param; @@ -721,7 +722,7 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_IS_PINNED(110) -/** Provide a hint to the kernel that the command stream and auxilliary +/** Provide a hint to the kernel that the command stream and auxiliary * state buffers already holds the correct presumed addresses and so the * relocation process may be skipped if no buffers need to be moved in * preparation for the execbuffer. -- 1.8.5.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 10/13] drm/i915: Enable PPGTT command parser checks
From: Brad Volkin bradley.d.vol...@intel.com Various commands that access memory have a bit to determine whether the graphics address specified in the command should use the GGTT or PPGTT for translation. These checks ensure that the bit indicates PPGTT translation. Most of these checks use the existing bit-checking infrastructure. The PIPE_CONTROL and MI_FLUSH_DW commands, however, are multi-function commands. The GGTT/PPGTT bit is only relevant for certain uses of the command. As such, this change also extends the bit-checking code to include a condition mask and offset. If the condition mask is non-zero then the parser only performs the bit check when the bits specified by the condition mask/offset are also non-zero. NOTE: At this point in the series PPGTT must be enabled for the parser to work correctly. If it's not enabled, userspace will not be setting the PPGTT bits the way the parser requires. VLV is the only platform where this is a problem, so at this point, we disable parsing for VLV. v2: whitespace and trailing commas fixes, rebased OTC-Tracker: AXIA-4631 Change-Id: I3f4c76b6734f1956ec47e698230f97d0998ff92b Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 128 ++--- drivers/gpu/drm/i915/i915_drv.h| 6 ++ drivers/gpu/drm/i915/i915_reg.h| 6 ++ 3 files changed, 129 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 0351df1..1528549 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -124,10 +124,20 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ), CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, .reg = { .offset = 1, .mask = 0x007C } ), - CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, W, - .reg = { .offset = 1, .mask = 0x007C } ), - CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W, - .reg = { .offset = 1, .mask = 0x007C } ), + CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, W | B, + .reg = { .offset = 1, .mask = 0x007C }, + .bits = {{ + .offset = 0, + .mask = MI_GLOBAL_GTT, + .expected = 0, + }}, ), + CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W | B, + .reg = { .offset = 1, .mask = 0x007C }, + .bits = {{ + .offset = 0, + .mask = MI_GLOBAL_GTT, + .expected = 0, + }}, ), CMD( MI_BATCH_BUFFER_START,SMI, !F, 0xFF, S ), }; @@ -139,9 +149,31 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = { CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), CMD( MI_SET_CONTEXT, SMI, !F, 0xFF, R ), CMD( MI_URB_CLEAR, SMI, !F, 0xFF, S ), + CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3F, B, + .bits = {{ + .offset = 0, + .mask = MI_GLOBAL_GTT, + .expected = 0, + }}, ), CMD( MI_UPDATE_GTT,SMI, !F, 0xFF, R ), - CMD( MI_CLFLUSH, SMI, !F, 0x3FF, S ), - CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, S ), + CMD( MI_CLFLUSH, SMI, !F, 0x3FF, B, + .bits = {{ + .offset = 0, + .mask = MI_GLOBAL_GTT, + .expected = 0, + }}, ), + CMD( MI_REPORT_PERF_COUNT, SMI, !F, 0x3F, B, + .bits = {{ + .offset = 1, + .mask = MI_REPORT_PERF_COUNT_GGTT, + .expected = 0, + }}, ), + CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, B, + .bits = {{ + .offset = 0, + .mask = MI_GLOBAL_GTT, + .expected = 0, + }}, ), CMD( GFX_OP_3DSTATE_VF_STATISTICS, S3D,F, 1, S ), CMD( PIPELINE_SELECT, S3D,F, 1, S ), CMD( MEDIA_VFE_STATE, S3D, !F, 0x, B, @@ -158,6 +190,13 @@ static const
[Intel-gfx] [PATCH 01/13] drm/i915: Refactor shmem pread setup
From: Brad Volkin bradley.d.vol...@intel.com The command parser is going to need the same synchronization and setup logic, so factor it out for reuse. v2: Add a check that the object is backed by shmem Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/i915_gem.c | 51 ++--- 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8c64831..582035b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2097,6 +2097,9 @@ void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); void i915_gem_lastclose(struct drm_device *dev); +int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, + int *needs_clflush); + int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3618bb0..83990cb 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -326,6 +326,42 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, return 0; } +/* + * Pins the specified object's pages and synchronizes the object with + * GPU accesses. Sets needs_clflush to non-zero if the caller should + * flush the object from the CPU cache. + */ +int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, + int *needs_clflush) +{ + int ret; + + *needs_clflush = 0; + + if (!obj-base.filp) + return -EINVAL; + + if (!(obj-base.read_domains I915_GEM_DOMAIN_CPU)) { + /* If we're not in the cpu read domain, set ourself into the gtt +* read domain and manually flush cachelines (if required). This +* optimizes for the case when the gpu will dirty the data +* anyway again before the next pread happens. */ + *needs_clflush = !cpu_cache_is_coherent(obj-base.dev, + obj-cache_level); + ret = i915_gem_object_wait_rendering(obj, true); + if (ret) + return ret; + } + + ret = i915_gem_object_get_pages(obj); + if (ret) + return ret; + + i915_gem_object_pin_pages(obj); + + return ret; +} + /* Per-page copy function for the shmem pread fastpath. * Flushes invalid cachelines before reading the target if * needs_clflush is set. */ @@ -423,23 +459,10 @@ i915_gem_shmem_pread(struct drm_device *dev, obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); - if (!(obj-base.read_domains I915_GEM_DOMAIN_CPU)) { - /* If we're not in the cpu read domain, set ourself into the gtt -* read domain and manually flush cachelines (if required). This -* optimizes for the case when the gpu will dirty the data -* anyway again before the next pread happens. */ - needs_clflush = !cpu_cache_is_coherent(dev, obj-cache_level); - ret = i915_gem_object_wait_rendering(obj, true); - if (ret) - return ret; - } - - ret = i915_gem_object_get_pages(obj); + ret = i915_gem_obj_prepare_shmem_read(obj, needs_clflush); if (ret) return ret; - i915_gem_object_pin_pages(obj); - offset = args-offset; for_each_sg_page(obj-pages-sgl, sg_iter, obj-pages-nents, -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 08/13] drm/i915: Enable register whitelist checks
From: Brad Volkin bradley.d.vol...@intel.com MI_STORE_REGISTER_MEM, MI_LOAD_REGISTER_MEM, and MI_LOAD_REGISTER_IMM commands allow userspace access to registers. Only certain registers should be allowed for such access, so enable checking for those commands. Each ring gets its own register whitelist. MI_LOAD_REGISTER_REG on HSW also allows register access but is currently unused by userspace components. Leave it rejected. PIPE_CONTROL and MEDIA_VFE_STATE allow register access based on certain bits being set. Reject those as well. v2: trailing commas, rebased OTC-Tracker: AXIA-4631 Change-Id: Ie614a2f0eb2e5917de809e5a17957175d24cc44f Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 21 ++--- drivers/gpu/drm/i915/i915_reg.h| 3 +++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 353e5cf..4f14a24 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -122,9 +122,12 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_SUSPEND_FLUSH, SMI,F, 1, S ), CMD( MI_SEMAPHORE_MBOX,SMI, !F, 0xFF, R ), CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ), - CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, R ), - CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, R ), - CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, R ), + CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007C } ), + CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007C } ), + CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007C } ), CMD( MI_BATCH_BUFFER_START,SMI, !F, 0xFF, S ), }; @@ -141,9 +144,21 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = { CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, S ), CMD( GFX_OP_3DSTATE_VF_STATISTICS, S3D,F, 1, S ), CMD( PIPELINE_SELECT, S3D,F, 1, S ), + CMD( MEDIA_VFE_STATE, S3D, !F, 0x, B, + .bits = {{ + .offset = 2, + .mask = MEDIA_VFE_STATE_MMIO_ACCESS_MASK, + .expected = 0, + }}, ), CMD( GPGPU_OBJECT, S3D, !F, 0xFF, S ), CMD( GPGPU_WALKER, S3D, !F, 0xFF, S ), CMD( GFX_OP_3DSTATE_SO_DECL_LIST, S3D, !F, 0x1FF, S ), + CMD( GFX_OP_PIPE_CONTROL(5), S3D, !F, 0xFF, B, + .bits = {{ + .offset = 1, + .mask = PIPE_CONTROL_MMIO_WRITE, + .expected = 0, + }}, ), }; static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 87523df..11cca96 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -330,6 +330,7 @@ #define DISPLAY_PLANE_B (120) #define GFX_OP_PIPE_CONTROL(len) ((0x329)|(0x327)|(0x224)|(len-2)) #define PIPE_CONTROL_GLOBAL_GTT_IVB (124) /* gen7+ */ +#define PIPE_CONTROL_MMIO_WRITE (123) #define PIPE_CONTROL_CS_STALL(120) #define PIPE_CONTROL_TLB_INVALIDATE (118) #define PIPE_CONTROL_QW_WRITE(114) @@ -370,6 +371,8 @@ #define PIPELINE_SELECT ((0x329)|(0x127)|(0x124)|(0x416)) #define GFX_OP_3DSTATE_VF_STATISTICS ((0x329)|(0x127)|(0x024)|(0xB16)) +#define MEDIA_VFE_STATE ((0x329)|(0x227)|(0x024)|(0x016)) +#define MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18) #define GPGPU_OBJECT ((0x329)|(0x227)|(0x124)|(0x416)) #define GPGPU_WALKER ((0x329)|(0x227)|(0x124)|(0x516)) #define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \ -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 11/13] drm/i915: Reject commands that would store to global HWS page
From: Brad Volkin bradley.d.vol...@intel.com PIPE_CONTROL and MI_FLUSH_DW have bits that would write to the hardware status page. The driver stores request tracking info there, so don't let userspace overwrite it. v2: trailing comma fix, rebased Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 24 +++- drivers/gpu/drm/i915/i915_reg.h| 1 + 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 1528549..f9aa01a 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -193,7 +193,8 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = { }, { .offset = 1, - .mask = PIPE_CONTROL_GLOBAL_GTT_IVB, + .mask = (PIPE_CONTROL_GLOBAL_GTT_IVB | +PIPE_CONTROL_STORE_DATA_INDEX), .expected = 0, .condition_offset = 1, .condition_mask = PIPE_CONTROL_POST_SYNC_OP_MASK, @@ -242,6 +243,13 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = { .expected = 0, .condition_offset = 0, .condition_mask = MI_FLUSH_DW_OP_MASK, + }, + { + .offset = 0, + .mask = MI_FLUSH_DW_STORE_INDEX, + .expected = 0, + .condition_offset = 0, + .condition_mask = MI_FLUSH_DW_OP_MASK, }}, ), CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, B, .bits = {{ @@ -278,6 +286,13 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = { .expected = 0, .condition_offset = 0, .condition_mask = MI_FLUSH_DW_OP_MASK, + }, + { + .offset = 0, + .mask = MI_FLUSH_DW_STORE_INDEX, + .expected = 0, + .condition_offset = 0, + .condition_mask = MI_FLUSH_DW_OP_MASK, }}, ), CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, B, .bits = {{ @@ -308,6 +323,13 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = { .expected = 0, .condition_offset = 0, .condition_mask = MI_FLUSH_DW_OP_MASK, + }, + { + .offset = 0, + .mask = MI_FLUSH_DW_STORE_INDEX, + .expected = 0, + .condition_offset = 0, + .condition_mask = MI_FLUSH_DW_OP_MASK, }}, ), CMD( COLOR_BLT,S2D, !F, 0x3F, S ), CMD( SRC_COPY_BLT, S2D, !F, 0x3F, S ), diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e683b31..46db649 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -335,6 +335,7 @@ #define GFX_OP_PIPE_CONTROL(len) ((0x329)|(0x327)|(0x224)|(len-2)) #define PIPE_CONTROL_GLOBAL_GTT_IVB (124) /* gen7+ */ #define PIPE_CONTROL_MMIO_WRITE (123) +#define PIPE_CONTROL_STORE_DATA_INDEX(121) #define PIPE_CONTROL_CS_STALL(120) #define PIPE_CONTROL_TLB_INVALIDATE (118) #define PIPE_CONTROL_QW_WRITE(114) -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 12/13] drm/i915: Add a CMD_PARSER_VERSION getparam
From: Brad Volkin bradley.d.vol...@intel.com So userspace can query the kernel for command parser support. v2: Add i915_cmd_parser_get_version(), history log, and kerneldoc OTC-Tracker: AXIA-4631 Change-Id: I58af650db9f6753c2dcac9c54ab432fd31db302f Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 19 +++ drivers/gpu/drm/i915/i915_dma.c| 3 +++ drivers/gpu/drm/i915/i915_drv.h| 1 + include/uapi/drm/i915_drm.h| 1 + 4 files changed, 24 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index f9aa01a..23c8174 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -897,3 +897,22 @@ int i915_parse_cmds(struct intel_ring_buffer *ring, return ret; } + +/** + * i915_cmd_parser_get_version() - get the cmd parser version number + * + * The cmd parser maintains a simple increasing integer version number suitable + * for passing to userspace clients to determine what operations are permitted. + * + * Return: the current version number of the cmd parser + */ +int i915_cmd_parser_get_version(void) +{ + /* +* Command parser version history +* +* 1. Initial version. Checks batches and reports violations, but leaves +*hardware parsing enabled (so does not allow new use cases). +*/ + return 1; +} diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 7688abc..14875f5 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1017,6 +1017,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_HANDLE_LUT: value = 1; break; + case I915_PARAM_CMD_PARSER_VERSION: + value = i915_cmd_parser_get_version(); + break; default: DRM_DEBUG(Unknown parameter %d\n, param-param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 27a48d9..6294d61 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2582,6 +2582,7 @@ void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone); const char *i915_cache_level_str(int type); /* i915_cmd_parser.c */ +int i915_cmd_parser_get_version(void); void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring); bool i915_needs_cmd_parser(struct intel_ring_buffer *ring); int i915_parse_cmds(struct intel_ring_buffer *ring, diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 126bfaa..8a3e4ef00 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -337,6 +337,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_EXEC_NO_RELOC25 #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 #define I915_PARAM_HAS_WT 27 +#define I915_PARAM_CMD_PARSER_VERSION 28 typedef struct drm_i915_getparam { int param; -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 04/13] drm/i915: Reject privileged commands
From: Brad Volkin bradley.d.vol...@intel.com The spec defines most of these commands as privileged. A few others, like the semaphore mbox command and some display commands, are also reserved for the driver's use. Subsequent patches relax some of these restrictions. v2: Rebased Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 54 -- drivers/gpu/drm/i915/i915_reg.h| 1 + 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 12241e8..90bbb6d 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -116,27 +116,27 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_NOOP, SMI,F, 1, S ), CMD( MI_USER_INTERRUPT,SMI,F, 1, S ), - CMD( MI_WAIT_FOR_EVENT,SMI,F, 1, S ), + CMD( MI_WAIT_FOR_EVENT,SMI,F, 1, R ), CMD( MI_ARB_CHECK, SMI,F, 1, S ), CMD( MI_REPORT_HEAD, SMI,F, 1, S ), CMD( MI_SUSPEND_FLUSH, SMI,F, 1, S ), - CMD( MI_SEMAPHORE_MBOX,SMI, !F, 0xFF, S ), - CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, S ), - CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, S ), - CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, S ), - CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, S ), + CMD( MI_SEMAPHORE_MBOX,SMI, !F, 0xFF, R ), + CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ), + CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, R ), + CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, R ), + CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, R ), CMD( MI_BATCH_BUFFER_START,SMI, !F, 0xFF, S ), }; static const struct drm_i915_cmd_descriptor render_cmds[] = { CMD( MI_FLUSH, SMI,F, 1, S ), - CMD( MI_ARB_ON_OFF,SMI,F, 1, S ), + CMD( MI_ARB_ON_OFF,SMI,F, 1, R ), CMD( MI_PREDICATE, SMI,F, 1, S ), CMD( MI_TOPOLOGY_FILTER, SMI,F, 1, S ), - CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, S ), - CMD( MI_SET_CONTEXT, SMI, !F, 0xFF, S ), + CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), + CMD( MI_SET_CONTEXT, SMI, !F, 0xFF, R ), CMD( MI_URB_CLEAR, SMI, !F, 0xFF, S ), - CMD( MI_UPDATE_GTT,SMI, !F, 0xFF, S ), + CMD( MI_UPDATE_GTT,SMI, !F, 0xFF, R ), CMD( MI_CLFLUSH, SMI, !F, 0x3FF, S ), CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, S ), CMD( GFX_OP_3DSTATE_VF_STATISTICS, S3D,F, 1, S ), @@ -151,7 +151,9 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { CMD( MI_RS_CONTROL,SMI,F, 1, S ), CMD( MI_URB_ATOMIC_ALLOC, SMI,F, 1, S ), CMD( MI_RS_CONTEXT,SMI,F, 1, S ), - CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, S ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), + CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), + CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, R ), CMD( MI_RS_STORE_DATA_IMM, SMI, !F, 0xFF, S ), CMD( MI_LOAD_URB_MEM, SMI, !F, 0xFF, S ), CMD( MI_STORE_URB_MEM, SMI, !F, 0xFF, S ), @@ -166,8 +168,9 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { }; static const struct drm_i915_cmd_descriptor video_cmds[] = { - CMD( MI_ARB_ON_OFF,SMI,F, 1, S ), + CMD( MI_ARB_ON_OFF,SMI,F, 1, R ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, S ), + CMD( MI_UPDATE_GTT,SMI, !F, 0x3F, R ), CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, S ), /* * MFX_WAIT doesn't fit the way we handle length for most commands. @@ -178,18 +181,25 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = { }; static const struct drm_i915_cmd_descriptor vecs_cmds[] = { - CMD( MI_ARB_ON_OFF,SMI,F, 1, S ), +
[Intel-gfx] [PATCH 5/6] tests/gem_exec_parse: Test for batches w/o MI_BATCH_BUFFER_END
From: Brad Volkin bradley.d.vol...@intel.com Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- tests/gem_exec_parse.c | 9 + 1 file changed, 9 insertions(+) diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c index 9e90408..004c3bf 100644 --- a/tests/gem_exec_parse.c +++ b/tests/gem_exec_parse.c @@ -257,6 +257,15 @@ igt_main -EINVAL)); } + igt_subtest(batch-without-end) { + uint32_t noop[1024] = { 0 }; + igt_assert( + exec_batch(fd, handle, + noop, sizeof(noop), + I915_EXEC_RENDER, + -EINVAL)); + } + igt_fixture { gem_close(fd, handle); -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 00/13] Gen7 batch buffer command parser
From: Brad Volkin bradley.d.vol...@intel.com Certain OpenGL features (e.g. transform feedback, performance monitoring) require userspace code to submit batches containing commands such as MI_LOAD_REGISTER_IMM to access various registers. Unfortunately, some generations of the hardware will noop these commands in unsecure batches (which includes all userspace batches submitted via i915) even though the commands may be safe and represent the intended programming model of the device. This series introduces a software command parser similar in operation to the command parsing done in hardware for unsecure batches. However, the software parser allows some operations that would be noop'd by hardware, if the parser determines the operation is safe, and submits the batch as secure to prevent hardware parsing. Currently the series implements this on IVB and HSW. The series has one piece of prep work, one patch for the parser logic, and a handful of patches to fill out the tables which drive the parser. There are follow-up patches to libdrm and to i-g-t. The i-g-t tests are basic and do not test all of the commands used by the parser on the assumption that I'm likely to make the same mistakes in both the parser and the test. WARNING!!! I've previously run the i-g-t gem_* tests, the piglit quick tests, and generally used Ubuntu 13.10 IVB and HSW systems with the parser running. Aside from a failure described below, I did not see any regressions. However, the series currently hits a BUG_ON() if you enable the parser due to a regression in secure batch handling on -nightly. At this point there are a couple of required/potential improvements. 1) Chained batches. The parser currently allows MI_BATCH_BUFFER_START commands in userspace batches without parsing them. The media driver uses chained batches, so a solution is required. I'm still working through the requirements but don't want to continue delaying the review process for what I have so far. 2) Command buffer copy. To avoid CPU modifications to buffers after parsing, and to avoid GPU modifications to buffers via EUs or commands in the batch, we should copy the userspace batch buffer to memory that userspace does not have access to, map it into GGTT, and execute that batch buffer. I have a sense of how to do this for 1st-level batches, but it may need changes to tie in with the chained batch parsing, so I've again held off. 3) Coherency. I've found a coherency issue on VLV when reading the batch buffer from the CPU during execbuffer2. Userspace writes the batch via pwrite fast path before calling execbuffer2. The parser reads stale data. This works fine on IVB and HSW, so I believe it's an LLC vs. non-LLC issue. I'm just unclear on what the correct flushing or synchronization is for this scenario. This only matters if we get PPGTT working on VLV and enable the parser there. v2: - Significantly reorder series - Scan secure batches (i.e. I915_EXEC_SECURE) - Check that parser tables are sorted during init - Fixed gem_cpu_reloc regression - HAS_CMD_PARSER - CMD_PARSER_VERSION getparam - Additional tests Brad Volkin (13): drm/i915: Refactor shmem pread setup drm/i915: Implement command buffer parsing logic drm/i915: Initial command parser table definitions drm/i915: Reject privileged commands drm/i915: Allow some privileged commands from master drm/i915: Add register whitelists for mesa drm/i915: Add register whitelist for DRM master drm/i915: Enable register whitelist checks drm/i915: Reject commands that explicitly generate interrupts drm/i915: Enable PPGTT command parser checks drm/i915: Reject commands that would store to global HWS page drm/i915: Add a CMD_PARSER_VERSION getparam drm/i915: Enable command parsing by default drivers/gpu/drm/i915/Makefile | 3 +- drivers/gpu/drm/i915/i915_cmd_parser.c | 845 + drivers/gpu/drm/i915/i915_dma.c| 4 + drivers/gpu/drm/i915/i915_drv.h| 103 drivers/gpu/drm/i915/i915_gem.c| 48 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 17 + drivers/gpu/drm/i915/i915_params.c | 5 + drivers/gpu/drm/i915/i915_reg.h| 78 +++ drivers/gpu/drm/i915/intel_ringbuffer.c| 2 + drivers/gpu/drm/i915/intel_ringbuffer.h| 32 ++ include/uapi/drm/i915_drm.h| 1 + 11 files changed, 1123 insertions(+), 15 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_cmd_parser.c -- 1.8.5.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 07/13] drm/i915: Add register whitelist for DRM master
From: Brad Volkin bradley.d.vol...@intel.com These are used to implement scanline waits in the X server. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 30 ++ 1 file changed, 30 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 18d5b05..296e322 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -234,6 +234,20 @@ static const u32 gen7_blt_regs[] = { BCS_SWCTRL, }; +/* Whitelists for the DRM master. Magic numbers are taken from sna, to match. */ +static const u32 ivb_master_regs[] = { + 0xa188, /* FORCEWAKE_MT */ + 0x44050, /* DERRMR */ + 0x70068, + 0x71068, + 0x72068, +}; + +static const u32 hsw_master_regs[] = { + 0xa188, /* FORCEWAKE_MT */ + 0x44050, /* DERRMR */ +}; + #define CLIENT_MASK 0xE000 #define SUBCLIENT_MASK 0x1800 #define MI_CLIENT0x @@ -365,6 +379,14 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) ring-reg_table = gen7_render_regs; ring-reg_count = ARRAY_SIZE(gen7_render_regs); + if (IS_HASWELL(ring-dev)) { + ring-master_reg_table = hsw_master_regs; + ring-master_reg_count = ARRAY_SIZE(hsw_master_regs); + } else { + ring-master_reg_table = ivb_master_regs; + ring-master_reg_count = ARRAY_SIZE(ivb_master_regs); + } + ring-get_cmd_length_mask = gen7_render_get_cmd_length_mask; break; case VCS: @@ -384,6 +406,14 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) ring-reg_table = gen7_blt_regs; ring-reg_count = ARRAY_SIZE(gen7_blt_regs); + if (IS_HASWELL(ring-dev)) { + ring-master_reg_table = hsw_master_regs; + ring-master_reg_count = ARRAY_SIZE(hsw_master_regs); + } else { + ring-master_reg_table = ivb_master_regs; + ring-master_reg_count = ARRAY_SIZE(ivb_master_regs); + } + ring-get_cmd_length_mask = gen7_blt_get_cmd_length_mask; break; case VECS: -- 1.8.5.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 06/13] drm/i915: Add register whitelists for mesa
From: Brad Volkin bradley.d.vol...@intel.com These registers are currently used by mesa for blitting, transform feedback extensions, and performance monitoring extensions. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 55 ++ drivers/gpu/drm/i915/i915_reg.h| 20 + 2 files changed, 75 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 88456638..18d5b05 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -185,6 +185,55 @@ static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = { { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, }; +/* + * Register whitelists, sorted by increasing register offset. + * + * Some registers that userspace accesses are 64 bits. The register + * access commands only allow 32-bit accesses. Hence, we have to include + * entries for both halves of the 64-bit registers. + */ + +static const u32 gen7_render_regs[] = { + HS_INVOCATION_COUNT, + HS_INVOCATION_COUNT + sizeof(u32), + DS_INVOCATION_COUNT, + DS_INVOCATION_COUNT + sizeof(u32), + IA_VERTICES_COUNT, + IA_VERTICES_COUNT + sizeof(u32), + IA_PRIMITIVES_COUNT, + IA_PRIMITIVES_COUNT + sizeof(u32), + VS_INVOCATION_COUNT, + VS_INVOCATION_COUNT + sizeof(u32), + GS_INVOCATION_COUNT, + GS_INVOCATION_COUNT + sizeof(u32), + GS_PRIMITIVES_COUNT, + GS_PRIMITIVES_COUNT + sizeof(u32), + CL_INVOCATION_COUNT, + CL_INVOCATION_COUNT + sizeof(u32), + CL_PRIMITIVES_COUNT, + CL_PRIMITIVES_COUNT + sizeof(u32), + PS_INVOCATION_COUNT, + PS_INVOCATION_COUNT + sizeof(u32), + PS_DEPTH_COUNT, + PS_DEPTH_COUNT + sizeof(u32), + GEN7_SO_NUM_PRIMS_WRITTEN(0), + GEN7_SO_NUM_PRIMS_WRITTEN(0) + sizeof(u32), + GEN7_SO_NUM_PRIMS_WRITTEN(1), + GEN7_SO_NUM_PRIMS_WRITTEN(1) + sizeof(u32), + GEN7_SO_NUM_PRIMS_WRITTEN(2), + GEN7_SO_NUM_PRIMS_WRITTEN(2) + sizeof(u32), + GEN7_SO_NUM_PRIMS_WRITTEN(3), + GEN7_SO_NUM_PRIMS_WRITTEN(3) + sizeof(u32), + GEN7_SO_WRITE_OFFSET(0), + GEN7_SO_WRITE_OFFSET(1), + GEN7_SO_WRITE_OFFSET(2), + GEN7_SO_WRITE_OFFSET(3), +}; + +static const u32 gen7_blt_regs[] = { + BCS_SWCTRL, +}; + #define CLIENT_MASK 0xE000 #define SUBCLIENT_MASK 0x1800 #define MI_CLIENT0x @@ -313,6 +362,9 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) ring-cmd_table_count = ARRAY_SIZE(gen7_render_cmds); } + ring-reg_table = gen7_render_regs; + ring-reg_count = ARRAY_SIZE(gen7_render_regs); + ring-get_cmd_length_mask = gen7_render_get_cmd_length_mask; break; case VCS: @@ -329,6 +381,9 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) ring-cmd_table_count = ARRAY_SIZE(gen7_blt_cmds); } + ring-reg_table = gen7_blt_regs; + ring-reg_count = ARRAY_SIZE(gen7_blt_regs); + ring-get_cmd_length_mask = gen7_blt_get_cmd_length_mask; break; case VECS: diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2b7c26e..b99bacf 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -385,6 +385,26 @@ #define SRC_COPY_BLT ((0x229)|(0x4322)) /* + * Registers used only by the command parser + */ +#define BCS_SWCTRL 0x22200 + +#define HS_INVOCATION_COUNT 0x2300 +#define DS_INVOCATION_COUNT 0x2308 +#define IA_VERTICES_COUNT 0x2310 +#define IA_PRIMITIVES_COUNT 0x2318 +#define VS_INVOCATION_COUNT 0x2320 +#define GS_INVOCATION_COUNT 0x2328 +#define GS_PRIMITIVES_COUNT 0x2330 +#define CL_INVOCATION_COUNT 0x2338 +#define CL_PRIMITIVES_COUNT 0x2340 +#define PS_INVOCATION_COUNT 0x2348 +#define PS_DEPTH_COUNT 0x2350 + +/* There are the 4 64-bit counter registers, one for each stream output */ +#define GEN7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8) + +/* * Reset registers */ #define DEBUG_RESET_I830 0x6070 -- 1.8.5.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 04/13] drm/i915: Reject privileged commands
From: Brad Volkin bradley.d.vol...@intel.com The spec defines most of these commands as privileged. A few others, like the semaphore mbox command and some display commands, are also reserved for the driver's use. Subsequent patches relax some of these restrictions. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 54 -- drivers/gpu/drm/i915/i915_reg.h| 31 +-- 2 files changed, 54 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 2e27bad..cc2f68c 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -57,27 +57,27 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_NOOP, SMI,F, 1, S ), CMD( MI_USER_INTERRUPT,SMI,F, 1, S ), - CMD( MI_WAIT_FOR_EVENT,SMI,F, 1, S ), + CMD( MI_WAIT_FOR_EVENT,SMI,F, 1, R ), CMD( MI_ARB_CHECK, SMI,F, 1, S ), CMD( MI_REPORT_HEAD, SMI,F, 1, S ), CMD( MI_SUSPEND_FLUSH, SMI,F, 1, S ), - CMD( MI_SEMAPHORE_MBOX,SMI, !F, 0xFF, S ), - CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, S ), - CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, S ), - CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, S ), - CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, S ), + CMD( MI_SEMAPHORE_MBOX,SMI, !F, 0xFF, R ), + CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ), + CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, R ), + CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, R ), + CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, R ), CMD( MI_BATCH_BUFFER_START,SMI, !F, 0xFF, S ), }; static const struct drm_i915_cmd_descriptor render_cmds[] = { CMD( MI_FLUSH, SMI,F, 1, S ), - CMD( MI_ARB_ON_OFF,SMI,F, 1, S ), + CMD( MI_ARB_ON_OFF,SMI,F, 1, R ), CMD( MI_PREDICATE, SMI,F, 1, S ), CMD( MI_TOPOLOGY_FILTER, SMI,F, 1, S ), - CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, S ), - CMD( MI_SET_CONTEXT, SMI, !F, 0xFF, S ), + CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), + CMD( MI_SET_CONTEXT, SMI, !F, 0xFF, R ), CMD( MI_URB_CLEAR, SMI, !F, 0xFF, S ), - CMD( MI_UPDATE_GTT,SMI, !F, 0xFF, S ), + CMD( MI_UPDATE_GTT,SMI, !F, 0xFF, R ), CMD( MI_CLFLUSH, SMI, !F, 0x3FF, S ), CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, S ), CMD( GFX_OP_3DSTATE_VF_STATISTICS, S3D,F, 1, S ), @@ -92,7 +92,9 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { CMD( MI_RS_CONTROL,SMI,F, 1, S ), CMD( MI_URB_ATOMIC_ALLOC, SMI,F, 1, S ), CMD( MI_RS_CONTEXT,SMI,F, 1, S ), - CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, S ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), + CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), + CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, R ), CMD( MI_RS_STORE_DATA_IMM, SMI, !F, 0xFF, S ), CMD( MI_LOAD_URB_MEM, SMI, !F, 0xFF, S ), CMD( MI_STORE_URB_MEM, SMI, !F, 0xFF, S ), @@ -107,8 +109,9 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { }; static const struct drm_i915_cmd_descriptor video_cmds[] = { - CMD( MI_ARB_ON_OFF,SMI,F, 1, S ), + CMD( MI_ARB_ON_OFF,SMI,F, 1, R ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, S ), + CMD( MI_UPDATE_GTT,SMI, !F, 0x3F, R ), CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, S ), /* * MFX_WAIT doesn't fit the way we handle length for most commands. @@ -119,18 +122,25 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = { }; static const struct drm_i915_cmd_descriptor vecs_cmds[] = { - CMD( MI_ARB_ON_OFF,SMI,F, 1, S ), +
[Intel-gfx] [PATCH 09/13] drm/i915: Reject commands that explicitly generate interrupts
From: Brad Volkin bradley.d.vol...@intel.com The driver leaves most interrupts masked during normal operation, so there would have to be additional work to enable userspace to safely request/receive an interrupt. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 25 +++-- drivers/gpu/drm/i915/i915_reg.h| 1 + 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 5d3e303..7de7c6a 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -56,7 +56,7 @@ -- */ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_NOOP, SMI,F, 1, S ), - CMD( MI_USER_INTERRUPT,SMI,F, 1, S ), + CMD( MI_USER_INTERRUPT,SMI,F, 1, R ), CMD( MI_WAIT_FOR_EVENT,SMI,F, 1, M ), CMD( MI_ARB_CHECK, SMI,F, 1, S ), CMD( MI_REPORT_HEAD, SMI,F, 1, S ), @@ -98,7 +98,7 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = { CMD( GFX_OP_PIPE_CONTROL(5), S3D, !F, 0xFF, B, .bits = {{ .offset = 1, - .mask = PIPE_CONTROL_MMIO_WRITE, + .mask = (PIPE_CONTROL_MMIO_WRITE | PIPE_CONTROL_NOTIFY), .expected = 0 }}, .bits_count = 1 ), @@ -129,6 +129,13 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = { CMD( MI_ARB_ON_OFF,SMI,F, 1, R ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, S ), CMD( MI_UPDATE_GTT,SMI, !F, 0x3F, R ), + CMD( MI_FLUSH_DW, SMI, !F, 0x3F, B, + .bits = {{ + .offset = 0, + .mask = MI_FLUSH_DW_NOTIFY, + .expected = 0 + }}, + .bits_count = 1 ), CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, S ), /* * MFX_WAIT doesn't fit the way we handle length for most commands. @@ -142,6 +149,13 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = { CMD( MI_ARB_ON_OFF,SMI,F, 1, R ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, S ), CMD( MI_UPDATE_GTT,SMI, !F, 0x3F, R ), + CMD( MI_FLUSH_DW, SMI, !F, 0x3F, B, + .bits = {{ + .offset = 0, + .mask = MI_FLUSH_DW_NOTIFY, + .expected = 0 + }}, + .bits_count = 1 ), CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, S ), }; @@ -149,6 +163,13 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = { CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ), CMD( MI_UPDATE_GTT,SMI, !F, 0x3F, R ), + CMD( MI_FLUSH_DW, SMI, !F, 0x3F, B, + .bits = {{ + .offset = 0, + .mask = MI_FLUSH_DW_NOTIFY, + .expected = 0 + }}, + .bits_count = 1 ), CMD( COLOR_BLT,S2D, !F, 0x3F, S ), CMD( SRC_COPY_BLT, S2D, !F, 0x3F, S ), }; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 6592d0d..c2e4898 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -258,6 +258,7 @@ #define MI_FLUSH_DW_STORE_INDEX (121) #define MI_INVALIDATE_TLB(118) #define MI_FLUSH_DW_OP_STOREDW (114) +#define MI_FLUSH_DW_NOTIFY (18) #define MI_INVALIDATE_BSD(17) #define MI_FLUSH_DW_USE_GTT (12) #define MI_FLUSH_DW_USE_PPGTT(02) -- 1.8.5.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 13/13] drm/i915: Enable command parsing by default
From: Brad Volkin bradley.d.vol...@intel.com OTC-Tracker: AXIA-4631 Change-Id: I6747457e1fe7494bd42787af51198fcba398ad78 Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_params.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 6d3d906..981b635 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -47,7 +47,7 @@ struct i915_params i915 __read_mostly = { .prefault_disable = 0, .reset = true, .invert_brightness = 0, - .enable_cmd_parser = 0 + .enable_cmd_parser = 1 }; module_param_named(modeset, i915.modeset, int, 0400); @@ -157,4 +157,4 @@ MODULE_PARM_DESC(invert_brightness, module_param_named(enable_cmd_parser, i915.enable_cmd_parser, int, 0600); MODULE_PARM_DESC(enable_cmd_parser, - Enable command parsing (default: false)); + Enable command parsing (default: true)); -- 1.8.5.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 12/13] drm/i915: Add a CMD_PARSER_VERSION getparam
From: Brad Volkin bradley.d.vol...@intel.com So userspace can query the kernel for command parser support. OTC-Tracker: AXIA-4631 Change-Id: I58af650db9f6753c2dcac9c54ab432fd31db302f Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_dma.c | 4 include/uapi/drm/i915_drm.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 258b1be..34ba199 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1013,6 +1013,10 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_HANDLE_LUT: value = 1; break; + case I915_PARAM_CMD_PARSER_VERSION: + /* TODO: version info (e.g. what is allowed?) */ + value = 1; + break; default: DRM_DEBUG(Unknown parameter %d\n, param-param); return -EINVAL; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 126bfaa..8a3e4ef00 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -337,6 +337,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_EXEC_NO_RELOC25 #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 #define I915_PARAM_HAS_WT 27 +#define I915_PARAM_CMD_PARSER_VERSION 28 typedef struct drm_i915_getparam { int param; -- 1.8.5.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 01/13] drm/i915: Refactor shmem pread setup
From: Brad Volkin bradley.d.vol...@intel.com The command parser is going to need the same synchronization and setup logic, so factor it out for reuse. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/i915_gem.c | 48 + 2 files changed, 37 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3673ba1..bfb30df 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2045,6 +2045,9 @@ void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); void i915_gem_lastclose(struct drm_device *dev); +int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, + int *needs_clflush); + int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 39770f7..fdc1f40 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -332,6 +332,39 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, return 0; } +/* + * Pins the specified object's pages and synchronizes the object with + * GPU accesses. Sets needs_clflush to non-zero if the caller should + * flush the object from the CPU cache. + */ +int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, + int *needs_clflush) +{ + int ret; + + *needs_clflush = 0; + + if (!(obj-base.read_domains I915_GEM_DOMAIN_CPU)) { + /* If we're not in the cpu read domain, set ourself into the gtt +* read domain and manually flush cachelines (if required). This +* optimizes for the case when the gpu will dirty the data +* anyway again before the next pread happens. */ + *needs_clflush = !cpu_cache_is_coherent(obj-base.dev, + obj-cache_level); + ret = i915_gem_object_wait_rendering(obj, true); + if (ret) + return ret; + } + + ret = i915_gem_object_get_pages(obj); + if (ret) + return ret; + + i915_gem_object_pin_pages(obj); + + return ret; +} + /* Per-page copy function for the shmem pread fastpath. * Flushes invalid cachelines before reading the target if * needs_clflush is set. */ @@ -429,23 +462,10 @@ i915_gem_shmem_pread(struct drm_device *dev, obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); - if (!(obj-base.read_domains I915_GEM_DOMAIN_CPU)) { - /* If we're not in the cpu read domain, set ourself into the gtt -* read domain and manually flush cachelines (if required). This -* optimizes for the case when the gpu will dirty the data -* anyway again before the next pread happens. */ - needs_clflush = !cpu_cache_is_coherent(dev, obj-cache_level); - ret = i915_gem_object_wait_rendering(obj, true); - if (ret) - return ret; - } - - ret = i915_gem_object_get_pages(obj); + ret = i915_gem_obj_prepare_shmem_read(obj, needs_clflush); if (ret) return ret; - i915_gem_object_pin_pages(obj); - offset = args-offset; for_each_sg_page(obj-pages-sgl, sg_iter, obj-pages-nents, -- 1.8.5.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 03/13] drm/i915: Initial command parser table definitions
From: Brad Volkin bradley.d.vol...@intel.com Add command tables defining irregular length commands for each ring. This requires a few new command opcode definitions. OTC-Tracker: AXIA-4631 Change-Id: I064bceb457e15f46928058352afe76d918c58ef5 Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 157 + drivers/gpu/drm/i915/i915_reg.h| 46 ++ 2 files changed, 203 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 7639dbc..2e27bad 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -27,6 +27,148 @@ #include i915_drv.h +#define STD_MI_OPCODE_MASK 0xFF80 +#define STD_3D_OPCODE_MASK 0x +#define STD_2D_OPCODE_MASK 0xFFC0 +#define STD_MFX_OPCODE_MASK 0x + +#define CMD(op, opm, f, lm, fl, ...) \ + { \ + .flags = (fl) | (f),\ + .cmd = { (op), (opm) }, \ + .length = { (lm) }, \ + __VA_ARGS__ \ + } + +/* Convenience macros to compress the tables */ +#define SMI STD_MI_OPCODE_MASK +#define S3D STD_3D_OPCODE_MASK +#define S2D STD_2D_OPCODE_MASK +#define SMFX STD_MFX_OPCODE_MASK +#define F CMD_DESC_FIXED +#define S CMD_DESC_SKIP +#define R CMD_DESC_REJECT +#define W CMD_DESC_REGISTER +#define B CMD_DESC_BITMASK +#define M CMD_DESC_MASTER + +/*Command Mask Fixed Len Action + -- */ +static const struct drm_i915_cmd_descriptor common_cmds[] = { + CMD( MI_NOOP, SMI,F, 1, S ), + CMD( MI_USER_INTERRUPT,SMI,F, 1, S ), + CMD( MI_WAIT_FOR_EVENT,SMI,F, 1, S ), + CMD( MI_ARB_CHECK, SMI,F, 1, S ), + CMD( MI_REPORT_HEAD, SMI,F, 1, S ), + CMD( MI_SUSPEND_FLUSH, SMI,F, 1, S ), + CMD( MI_SEMAPHORE_MBOX,SMI, !F, 0xFF, S ), + CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, S ), + CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, S ), + CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, S ), + CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, S ), + CMD( MI_BATCH_BUFFER_START,SMI, !F, 0xFF, S ), +}; + +static const struct drm_i915_cmd_descriptor render_cmds[] = { + CMD( MI_FLUSH, SMI,F, 1, S ), + CMD( MI_ARB_ON_OFF,SMI,F, 1, S ), + CMD( MI_PREDICATE, SMI,F, 1, S ), + CMD( MI_TOPOLOGY_FILTER, SMI,F, 1, S ), + CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, S ), + CMD( MI_SET_CONTEXT, SMI, !F, 0xFF, S ), + CMD( MI_URB_CLEAR, SMI, !F, 0xFF, S ), + CMD( MI_UPDATE_GTT,SMI, !F, 0xFF, S ), + CMD( MI_CLFLUSH, SMI, !F, 0x3FF, S ), + CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, S ), + CMD( GFX_OP_3DSTATE_VF_STATISTICS, S3D,F, 1, S ), + CMD( PIPELINE_SELECT, S3D,F, 1, S ), + CMD( GPGPU_OBJECT, S3D, !F, 0xFF, S ), + CMD( GPGPU_WALKER, S3D, !F, 0xFF, S ), + CMD( GFX_OP_3DSTATE_SO_DECL_LIST, S3D, !F, 0x1FF, S ), +}; + +static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { + CMD( MI_SET_PREDICATE, SMI,F, 1, S ), + CMD( MI_RS_CONTROL,SMI,F, 1, S ), + CMD( MI_URB_ATOMIC_ALLOC, SMI,F, 1, S ), + CMD( MI_RS_CONTEXT,SMI,F, 1, S ), + CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, S ), + CMD( MI_RS_STORE_DATA_IMM, SMI, !F, 0xFF, S ), + CMD( MI_LOAD_URB_MEM, SMI, !F, 0xFF, S ), + CMD( MI_STORE_URB_MEM, SMI, !F, 0xFF, S ), + CMD( GFX_OP_3DSTATE_DX9_CONSTANTF_VS, S3D, !F, 0x7FF, S ), + CMD( GFX_OP_3DSTATE_DX9_CONSTANTF_PS, S3D, !F, 0x7FF, S ), + + CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS, S3D, !F, 0x1FF, S ), + CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS, S3D, !F, 0x1FF, S ), + CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS, S3D, !F, 0x1FF, S ), + CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS, S3D, !F, 0x1FF, S ), + CMD(
[Intel-gfx] [PATCH 02/13] drm/i915: Implement command buffer parsing logic
From: Brad Volkin bradley.d.vol...@intel.com The command parser scans batch buffers submitted via execbuffer ioctls before the driver submits them to hardware. At a high level, it looks for several things: 1) Commands which are explicitly defined as privileged or which should only be used by the kernel driver. The parser generally rejects such commands, with the provision that it may allow some from the drm master process. 2) Commands which access registers. To support correct/enhanced userspace functionality, particularly certain OpenGL extensions, the parser provides a whitelist of registers which userspace may safely access (for both normal and drm master processes). 3) Commands which access privileged memory (i.e. GGTT, HWS page, etc). The parser always rejects such commands. Each ring maintains tables of commands and registers which the parser uses in scanning batch buffers submitted to that ring. The set of commands that the parser must check for is significantly smaller than the number of commands supported, especially on the render ring. As such, the parser tables (built up in subsequent patches) contain only those commands required by the parser. This generally works because command opcode ranges have standard command length encodings. So for commands that the parser does not need to check, it can easily skip them. This is implementated via a per-ring length decoding vfunc. Unfortunately, there are a number of commands that do not follow the standard length encoding for their opcode range, primarily amongst the MI_* commands. To handle this, the parser provides a way to define explicit skip entries in the per-ring command tables. Other command table entries will map fairly directly to high level categories mentioned above: rejected, master-only, register whitelist. A number of checks, including the privileged memory checks, are implemented via a general bitmasking mechanism. OTC-Tracker: AXIA-4631 Change-Id: I50b98c71c6655893291c78a2d1b8954577b37a30 Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/Makefile | 3 +- drivers/gpu/drm/i915/i915_cmd_parser.c | 404 + drivers/gpu/drm/i915/i915_drv.h| 94 +++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 17 ++ drivers/gpu/drm/i915/i915_params.c | 5 + drivers/gpu/drm/i915/intel_ringbuffer.c| 2 + drivers/gpu/drm/i915/intel_ringbuffer.h| 32 +++ 7 files changed, 556 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/i915/i915_cmd_parser.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 4850494..2da81bf 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -47,7 +47,8 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \ dvo_tfp410.o \ dvo_sil164.o \ dvo_ns2501.o \ - i915_gem_dmabuf.o + i915_gem_dmabuf.o \ + i915_cmd_parser.o i915-$(CONFIG_COMPAT) += i915_ioc32.o diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c new file mode 100644 index 000..7639dbc --- /dev/null +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -0,0 +1,404 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Brad Volkin bradley.d.vol...@intel.com + * + */ + +#include i915_drv.h + +#define CLIENT_MASK 0xE000 +#define SUBCLIENT_MASK 0x1800 +#define MI_CLIENT0x +#define RC_CLIENT0x6000 +#define BC_CLIENT0x4000 +#define MEDIA_SUBCLIENT 0x1000 + +static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) +{ + u32 client = cmd_header CLIENT_MASK; + u32 subclient = cmd_header SUBCLIENT_MASK; + + if (client == MI_CLIENT) + return 0x3F; + else if (client ==
[Intel-gfx] [PATCH 05/13] drm/i915: Allow some privileged commands from master
From: Brad Volkin bradley.d.vol...@intel.com The Intel DDX uses these to implement scanline waits in the X server. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index cc2f68c..88456638 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -57,7 +57,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_NOOP, SMI,F, 1, S ), CMD( MI_USER_INTERRUPT,SMI,F, 1, S ), - CMD( MI_WAIT_FOR_EVENT,SMI,F, 1, R ), + CMD( MI_WAIT_FOR_EVENT,SMI,F, 1, M ), CMD( MI_ARB_CHECK, SMI,F, 1, S ), CMD( MI_REPORT_HEAD, SMI,F, 1, S ), CMD( MI_SUSPEND_FLUSH, SMI,F, 1, S ), @@ -92,7 +92,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { CMD( MI_RS_CONTROL,SMI,F, 1, S ), CMD( MI_URB_ATOMIC_ALLOC, SMI,F, 1, S ), CMD( MI_RS_CONTEXT,SMI,F, 1, S ), - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, R ), CMD( MI_RS_STORE_DATA_IMM, SMI, !F, 0xFF, S ), @@ -137,7 +137,7 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = { }; static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), }; -- 1.8.5.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 10/13] drm/i915: Enable PPGTT command parser checks
From: Brad Volkin bradley.d.vol...@intel.com Various commands that access memory have a bit to determine whether the graphics address specified in the command should use the GGTT or PPGTT for translation. These checks ensure that the bit indicates PPGTT translation. Most of these checks use the existing bit-checking infrastructure. The PIPE_CONTROL and MI_FLUSH_DW commands, however, are multi-function commands. The GGTT/PPGTT bit is only relevant for certain uses of the command. As such, this change also extends the bit-checking code to include a condition mask and offset. If the condition mask is non-zero then the parser only performs the bit check when the bits specified by the condition mask/offset are also non-zero. NOTE: At this point in the series PPGTT must be enabled for the parser to work correctly. If it's not enabled, userspace will not be setting the PPGTT bits the way the parser requires. VLV is the only platform where this is a problem, so at this point, we disable parsing for VLV. OTC-Tracker: AXIA-4631 Change-Id: I3f4c76b6734f1956ec47e698230f97d0998ff92b Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 147 + drivers/gpu/drm/i915/i915_drv.h| 6 ++ drivers/gpu/drm/i915/i915_reg.h| 6 ++ 3 files changed, 144 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 7de7c6a..26072a2 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -65,10 +65,22 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ), CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, .reg = { .offset = 1, .mask = 0x007C } ), - CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, W, - .reg = { .offset = 1, .mask = 0x007C } ), - CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W, - .reg = { .offset = 1, .mask = 0x007C } ), + CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, W | B, + .reg = { .offset = 1, .mask = 0x007C }, + .bits = {{ + .offset = 0, + .mask = MI_GLOBAL_GTT, + .expected = 0 + }}, + .bits_count = 1 ), + CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W | B, + .reg = { .offset = 1, .mask = 0x007C }, + .bits = {{ + .offset = 0, + .mask = MI_GLOBAL_GTT, + .expected = 0 + }}, + .bits_count = 1 ), CMD( MI_BATCH_BUFFER_START,SMI, !F, 0xFF, S ), }; @@ -80,9 +92,35 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = { CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), CMD( MI_SET_CONTEXT, SMI, !F, 0xFF, R ), CMD( MI_URB_CLEAR, SMI, !F, 0xFF, S ), + CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3F, B, + .bits = {{ + .offset = 0, + .mask = MI_GLOBAL_GTT, + .expected = 0 + }}, + .bits_count = 1 ), CMD( MI_UPDATE_GTT,SMI, !F, 0xFF, R ), - CMD( MI_CLFLUSH, SMI, !F, 0x3FF, S ), - CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, S ), + CMD( MI_CLFLUSH, SMI, !F, 0x3FF, B, + .bits = {{ + .offset = 0, + .mask = MI_GLOBAL_GTT, + .expected = 0 + }}, + .bits_count = 1 ), + CMD( MI_REPORT_PERF_COUNT, SMI, !F, 0x3F, B, + .bits = {{ + .offset = 1, + .mask = MI_REPORT_PERF_COUNT_GGTT, + .expected = 0 + }}, + .bits_count = 1 ), + CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, B, + .bits = {{ + .offset = 0, + .mask = MI_GLOBAL_GTT, + .expected = 0 + }}, + .bits_count = 1 ), CMD( GFX_OP_3DSTATE_VF_STATISTICS, S3D,F, 1, S ), CMD( PIPELINE_SELECT, S3D,F, 1, S ), CMD( MEDIA_VFE_STATE, S3D, !F,
[Intel-gfx] [PATCH 11/13] drm/i915: Reject commands that would store to global HWS page
From: Brad Volkin bradley.d.vol...@intel.com PIPE_CONTROL and MI_FLUSH_DW have bits that would write to the hardware status page. The driver stores request tracking info there, so don't let userspace overwrite it. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 30 ++ drivers/gpu/drm/i915/i915_reg.h| 1 + 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 26072a2..b93df1c 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -141,7 +141,8 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = { }, { .offset = 1, - .mask = PIPE_CONTROL_GLOBAL_GTT_IVB, + .mask = (PIPE_CONTROL_GLOBAL_GTT_IVB | +PIPE_CONTROL_STORE_DATA_INDEX), .expected = 0, .condition_offset = 1, .condition_mask = PIPE_CONTROL_POST_SYNC_OP_MASK @@ -192,8 +193,15 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = { .expected = 0, .condition_offset = 0, .condition_mask = MI_FLUSH_DW_OP_MASK + }, + { + .offset = 0, + .mask = MI_FLUSH_DW_STORE_INDEX, + .expected = 0, + .condition_offset = 0, + .condition_mask = MI_FLUSH_DW_OP_MASK }}, - .bits_count = 2 ), + .bits_count = 3 ), CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, B, .bits = {{ .offset = 0, @@ -231,8 +239,15 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = { .expected = 0, .condition_offset = 0, .condition_mask = MI_FLUSH_DW_OP_MASK + }, + { + .offset = 0, + .mask = MI_FLUSH_DW_STORE_INDEX, + .expected = 0, + .condition_offset = 0, + .condition_mask = MI_FLUSH_DW_OP_MASK }}, - .bits_count = 2 ), + .bits_count = 3 ), CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, B, .bits = {{ .offset = 0, @@ -264,8 +279,15 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = { .expected = 0, .condition_offset = 0, .condition_mask = MI_FLUSH_DW_OP_MASK + }, + { + .offset = 0, + .mask = MI_FLUSH_DW_STORE_INDEX, + .expected = 0, + .condition_offset = 0, + .condition_mask = MI_FLUSH_DW_OP_MASK }}, - .bits_count = 2 ), + .bits_count = 3 ), CMD( COLOR_BLT,S2D, !F, 0x3F, S ), CMD( SRC_COPY_BLT, S2D, !F, 0x3F, S ), }; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ff263f4..5f77cb6 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -324,6 +324,7 @@ #define GFX_OP_PIPE_CONTROL(len) ((0x329)|(0x327)|(0x224)|(len-2)) #define PIPE_CONTROL_GLOBAL_GTT_IVB (124) /* gen7+ */ #define PIPE_CONTROL_MMIO_WRITE (123) +#define PIPE_CONTROL_STORE_DATA_INDEX(121) #define PIPE_CONTROL_CS_STALL(120) #define PIPE_CONTROL_TLB_INVALIDATE (118) #define PIPE_CONTROL_QW_WRITE(114) -- 1.8.5.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/6] tests: Add a test for the command parser
From: Brad Volkin bradley.d.vol...@intel.com Start with a simple testcase that should pass. v2: Switch to I915_PARAM_CMD_PARSER_VERSION Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- tests/.gitignore | 1 + tests/Makefile.sources | 1 + tests/gem_exec_parse.c | 140 + 3 files changed, 142 insertions(+) create mode 100644 tests/gem_exec_parse.c diff --git a/tests/.gitignore b/tests/.gitignore index 7377275..f2356fb 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -35,6 +35,7 @@ gem_exec_blt gem_exec_faulting_reloc gem_exec_lut_handle gem_exec_nop +gem_exec_parse gem_fd_exhaustion gem_fenced_exec_thrash gem_fence_thrash diff --git a/tests/Makefile.sources b/tests/Makefile.sources index a8c0c96..90a5322 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -29,6 +29,7 @@ TESTS_progs_M = \ gem_exec_bad_domains \ gem_exec_faulting_reloc \ gem_exec_nop \ + gem_exec_parse \ gem_fenced_exec_thrash \ gem_fence_thrash \ gem_flink \ diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c new file mode 100644 index 000..c71e478 --- /dev/null +++ b/tests/gem_exec_parse.c @@ -0,0 +1,140 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include stdlib.h +#include stdint.h +#include stdio.h +#include drm.h +#include i915_drm.h +#include drmtest.h + +#ifndef I915_PARAM_CMD_PARSER_VERSION +#define I915_PARAM_CMD_PARSER_VERSION 28 +#endif + +static int exec_batch_patched(int fd, uint32_t cmd_bo, uint32_t *cmds, + int size, int patch_offset, uint64_t expected_value) +{ + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 objs[2]; + struct drm_i915_gem_relocation_entry reloc[1]; + + uint32_t target_bo = gem_create(fd, 4096); + uint64_t actual_value = 0; + + gem_write(fd, cmd_bo, 0, cmds, size); + + reloc[0].offset = patch_offset; + reloc[0].delta = 0; + reloc[0].target_handle = target_bo; + reloc[0].read_domains = I915_GEM_DOMAIN_RENDER; + reloc[0].write_domain = I915_GEM_DOMAIN_RENDER; + reloc[0].presumed_offset = 0; + + objs[0].handle = target_bo; + objs[0].relocation_count = 0; + objs[0].relocs_ptr = 0; + objs[0].alignment = 0; + objs[0].offset = 0; + objs[0].flags = 0; + objs[0].rsvd1 = 0; + objs[0].rsvd2 = 0; + + objs[1].handle = cmd_bo; + objs[1].relocation_count = 1; + objs[1].relocs_ptr = (uintptr_t)reloc; + objs[1].alignment = 0; + objs[1].offset = 0; + objs[1].flags = 0; + objs[1].rsvd1 = 0; + objs[1].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t)objs; + execbuf.buffer_count = 2; + execbuf.batch_start_offset = 0; + execbuf.batch_len = size; + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + execbuf.flags = I915_EXEC_RENDER; + i915_execbuffer2_set_context_id(execbuf, 0); + execbuf.rsvd2 = 0; + + gem_execbuf(fd, execbuf); + gem_sync(fd, cmd_bo); + + gem_read(fd,target_bo, 0, actual_value, sizeof(actual_value)); + igt_assert(expected_value == actual_value); + + gem_close(fd, target_bo); + + return 1; +} + +uint32_t handle; +int fd; + +#define GFX_OP_PIPE_CONTROL((0x329)|(0x327)|(0x224)|2) +#define PIPE_CONTROL_QW_WRITE(114) + +igt_main +{ + igt_fixture { + int parser_version = 0; +drm_i915_getparam_t gp; + int rc; + + fd = drm_open_any(); + + gp.param = I915_PARAM_CMD_PARSER_VERSION; + gp.value = parser_version; + rc = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM,
[Intel-gfx] [PATCH 5/6] tests/gem_exec_parse: Test for batches w/o MI_BATCH_BUFFER_END
From: Brad Volkin bradley.d.vol...@intel.com Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- tests/gem_exec_parse.c | 9 + 1 file changed, 9 insertions(+) diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c index 9e90408..004c3bf 100644 --- a/tests/gem_exec_parse.c +++ b/tests/gem_exec_parse.c @@ -257,6 +257,15 @@ igt_main -EINVAL)); } + igt_subtest(batch-without-end) { + uint32_t noop[1024] = { 0 }; + igt_assert( + exec_batch(fd, handle, + noop, sizeof(noop), + I915_EXEC_RENDER, + -EINVAL)); + } + igt_fixture { gem_close(fd, handle); -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 4/6] tests/gem_exec_parse: Add tests for bitmask checks
From: Brad Volkin bradley.d.vol...@intel.com Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- tests/gem_exec_parse.c | 18 ++ 1 file changed, 18 insertions(+) diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c index 48fde25..9e90408 100644 --- a/tests/gem_exec_parse.c +++ b/tests/gem_exec_parse.c @@ -145,6 +145,7 @@ int fd; #define GFX_OP_PIPE_CONTROL((0x329)|(0x327)|(0x224)|2) #define PIPE_CONTROL_QW_WRITE(114) +#define PIPE_CONTROL_LRI_POST_OP (123) igt_main { @@ -239,6 +240,23 @@ igt_main 0)); } + igt_subtest(bitmasks) { + uint32_t pc[] = { + GFX_OP_PIPE_CONTROL, + (PIPE_CONTROL_QW_WRITE | +PIPE_CONTROL_LRI_POST_OP), + 0, // To be patched + 0x1200, + 0, + MI_BATCH_BUFFER_END, + }; + igt_assert( + exec_batch(fd, handle, + pc, sizeof(pc), + I915_EXEC_RENDER, + -EINVAL)); + } + igt_fixture { gem_close(fd, handle); -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 3/6] tests/gem_exec_parse: Add tests for register whitelist
From: Brad Volkin bradley.d.vol...@intel.com Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- tests/gem_exec_parse.c | 26 ++ 1 file changed, 26 insertions(+) diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c index ebf7116..48fde25 100644 --- a/tests/gem_exec_parse.c +++ b/tests/gem_exec_parse.c @@ -141,6 +141,7 @@ int fd; #define MI_ARB_ON_OFF (0x8 23) #define MI_DISPLAY_FLIP ((0x14 23) | 1) +#define MI_LOAD_REGISTER_IMM ((0x22 23) | 1) #define GFX_OP_PIPE_CONTROL((0x329)|(0x327)|(0x224)|2) #define PIPE_CONTROL_QW_WRITE(114) @@ -213,6 +214,31 @@ igt_main -EINVAL)); } + igt_subtest(registers) { + uint32_t lri_bad[] = { + MI_LOAD_REGISTER_IMM, + 0, // disallowed register address + 0x1200, + MI_BATCH_BUFFER_END, + }; + uint32_t lri_ok[] = { + MI_LOAD_REGISTER_IMM, + 0x5280, // allowed register address (SO_WRITE_OFFSET[0]) + 0x1, + MI_BATCH_BUFFER_END, + }; + igt_assert( + exec_batch(fd, handle, + lri_bad, sizeof(lri_bad), + I915_EXEC_RENDER, + -EINVAL)); + igt_assert( + exec_batch(fd, handle, + lri_ok, sizeof(lri_ok), + I915_EXEC_RENDER, + 0)); + } + igt_fixture { gem_close(fd, handle); -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] intel: Merge i915_drm.h with cmd parser define
From: Brad Volkin bradley.d.vol...@intel.com Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- include/drm/i915_drm.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 2f4eb8c..ba863c4 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -27,7 +27,7 @@ #ifndef _I915_DRM_H_ #define _I915_DRM_H_ -#include drm.h +#include drm/drm.h /* Please note that modifications to all structs defined here are * subject to backwards-compatibility constraints. @@ -337,6 +337,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_EXEC_NO_RELOC25 #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 #define I915_PARAM_HAS_WT 27 +#define I915_PARAM_CMD_PARSER_VERSION 28 typedef struct drm_i915_getparam { int param; @@ -721,7 +722,7 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_IS_PINNED(110) -/** Provide a hint to the kernel that the command stream and auxilliary +/** Provide a hint to the kernel that the command stream and auxiliary * state buffers already holds the correct presumed addresses and so the * relocation process may be skipped if no buffers need to be moved in * preparation for the execbuffer. -- 1.8.5.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/6] tests/gem_exec_parse: Add tests for rejected commands
From: Brad Volkin bradley.d.vol...@intel.com Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- tests/gem_exec_parse.c | 81 ++ 1 file changed, 81 insertions(+) diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c index c71e478..ebf7116 100644 --- a/tests/gem_exec_parse.c +++ b/tests/gem_exec_parse.c @@ -93,9 +93,55 @@ static int exec_batch_patched(int fd, uint32_t cmd_bo, uint32_t *cmds, return 1; } +static int exec_batch(int fd, uint32_t cmd_bo, uint32_t *cmds, + int size, int ring, int expected_ret) +{ + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 objs[1]; + int ret; + + gem_write(fd, cmd_bo, 0, cmds, size); + + objs[0].handle = cmd_bo; + objs[0].relocation_count = 0; + objs[0].relocs_ptr = 0; + objs[0].alignment = 0; + objs[0].offset = 0; + objs[0].flags = 0; + objs[0].rsvd1 = 0; + objs[0].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t)objs; + execbuf.buffer_count = 1; + execbuf.batch_start_offset = 0; + execbuf.batch_len = size; + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + execbuf.flags = ring; + i915_execbuffer2_set_context_id(execbuf, 0); + execbuf.rsvd2 = 0; + + ret = drmIoctl(fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2, + execbuf); + if (ret == 0) + igt_assert(expected_ret == 0); + else + igt_assert(-errno == expected_ret); + + gem_sync(fd, cmd_bo); + + return 1; +} + uint32_t handle; int fd; +#define MI_ARB_ON_OFF (0x8 23) +#define MI_DISPLAY_FLIP ((0x14 23) | 1) + #define GFX_OP_PIPE_CONTROL((0x329)|(0x327)|(0x224)|2) #define PIPE_CONTROL_QW_WRITE(114) @@ -132,6 +178,41 @@ igt_main 0x1200)); } + igt_subtest(basic-rejected) { + uint32_t arb_on_off[] = { + MI_ARB_ON_OFF, + MI_BATCH_BUFFER_END, + }; + uint32_t display_flip[] = { + MI_DISPLAY_FLIP, + 0, 0, 0, + MI_BATCH_BUFFER_END, + 0 + }; + igt_assert( + exec_batch(fd, handle, + arb_on_off, sizeof(arb_on_off), + I915_EXEC_RENDER, + -EINVAL)); + igt_assert( + exec_batch(fd, handle, + arb_on_off, sizeof(arb_on_off), + I915_EXEC_BSD, + -EINVAL)); + if (gem_has_vebox(fd)) { + igt_assert( + exec_batch(fd, handle, + arb_on_off, sizeof(arb_on_off), + I915_EXEC_VEBOX, + -EINVAL)); + } + igt_assert( + exec_batch(fd, handle, + display_flip, sizeof(display_flip), + I915_EXEC_BLT, + -EINVAL)); + } + igt_fixture { gem_close(fd, handle); -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 08/13] drm/i915: Enable register whitelist checks
From: Brad Volkin bradley.d.vol...@intel.com MI_STORE_REGISTER_MEM, MI_LOAD_REGISTER_MEM, and MI_LOAD_REGISTER_IMM commands allow userspace access to registers. Only certain registers should be allowed for such access, so enable checking for those commands. Each ring gets its own register whitelist. MI_LOAD_REGISTER_REG on HSW also allows register access but is currently unused by userspace components. Leave it rejected. PIPE_CONTROL and MEDIA_VFE_STATE allow register access based on certain bits being set. Reject those as well. OTC-Tracker: AXIA-4631 Change-Id: Ie614a2f0eb2e5917de809e5a17957175d24cc44f Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 23 --- drivers/gpu/drm/i915/i915_reg.h| 3 +++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 296e322..5d3e303 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -63,9 +63,12 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_SUSPEND_FLUSH, SMI,F, 1, S ), CMD( MI_SEMAPHORE_MBOX,SMI, !F, 0xFF, R ), CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ), - CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, R ), - CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, R ), - CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, R ), + CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007C } ), + CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007C } ), + CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007C } ), CMD( MI_BATCH_BUFFER_START,SMI, !F, 0xFF, S ), }; @@ -82,9 +85,23 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = { CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, S ), CMD( GFX_OP_3DSTATE_VF_STATISTICS, S3D,F, 1, S ), CMD( PIPELINE_SELECT, S3D,F, 1, S ), + CMD( MEDIA_VFE_STATE, S3D, !F, 0x, B, + .bits = {{ + .offset = 2, + .mask = MEDIA_VFE_STATE_MMIO_ACCESS_MASK, + .expected = 0 + }}, + .bits_count = 1 ), CMD( GPGPU_OBJECT, S3D, !F, 0xFF, S ), CMD( GPGPU_WALKER, S3D, !F, 0xFF, S ), CMD( GFX_OP_3DSTATE_SO_DECL_LIST, S3D, !F, 0x1FF, S ), + CMD( GFX_OP_PIPE_CONTROL(5), S3D, !F, 0xFF, B, + .bits = {{ + .offset = 1, + .mask = PIPE_CONTROL_MMIO_WRITE, + .expected = 0 + }}, + .bits_count = 1 ), }; static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b99bacf..6592d0d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -319,6 +319,7 @@ #define DISPLAY_PLANE_B (120) #define GFX_OP_PIPE_CONTROL(len) ((0x329)|(0x327)|(0x224)|(len-2)) #define PIPE_CONTROL_GLOBAL_GTT_IVB (124) /* gen7+ */ +#define PIPE_CONTROL_MMIO_WRITE (123) #define PIPE_CONTROL_CS_STALL(120) #define PIPE_CONTROL_TLB_INVALIDATE (118) #define PIPE_CONTROL_QW_WRITE(114) @@ -359,6 +360,8 @@ #define PIPELINE_SELECT ((0x329)|(0x127)|(0x124)|(0x416)) #define GFX_OP_3DSTATE_VF_STATISTICS ((0x329)|(0x127)|(0x024)|(0xB16)) +#define MEDIA_VFE_STATE ((0x329)|(0x227)|(0x024)|(0x016)) +#define MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18) #define GPGPU_OBJECT ((0x329)|(0x227)|(0x124)|(0x416)) #define GPGPU_WALKER ((0x329)|(0x227)|(0x124)|(0x516)) #define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \ -- 1.8.5.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 6/6] tests/gem_exec_parse: Test a command crossing a page boundary
From: Brad Volkin bradley.d.vol...@intel.com This is a speculative test in that it's not particularly relevant today, but is important if we switch the parser implementation to use kmap_atomic instead of vmap. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- tests/gem_exec_parse.c | 68 ++ 1 file changed, 68 insertions(+) diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c index 004c3bf..455bfbf 100644 --- a/tests/gem_exec_parse.c +++ b/tests/gem_exec_parse.c @@ -136,6 +136,60 @@ static int exec_batch(int fd, uint32_t cmd_bo, uint32_t *cmds, return 1; } +static int exec_split_batch(int fd, uint32_t *cmds, + int size, int ring, int expected_ret) +{ + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 objs[1]; + uint32_t cmd_bo; + uint32_t noop[1024] = { 0 }; + int ret; + + // Allocate and fill a 2-page batch with noops + cmd_bo = gem_create(fd, 4096 * 2); + gem_write(fd, cmd_bo, 0, noop, sizeof(noop)); + gem_write(fd, cmd_bo, 4096, noop, sizeof(noop)); + + // Write the provided commands such that the first dword + // of the command buffer is the last dword of the first + // page (i.e. the command is split across the two pages). + gem_write(fd, cmd_bo, 4096-sizeof(uint32_t), cmds, size); + + objs[0].handle = cmd_bo; + objs[0].relocation_count = 0; + objs[0].relocs_ptr = 0; + objs[0].alignment = 0; + objs[0].offset = 0; + objs[0].flags = 0; + objs[0].rsvd1 = 0; + objs[0].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t)objs; + execbuf.buffer_count = 1; + execbuf.batch_start_offset = 0; + execbuf.batch_len = size; + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + execbuf.flags = ring; + i915_execbuffer2_set_context_id(execbuf, 0); + execbuf.rsvd2 = 0; + + ret = drmIoctl(fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2, + execbuf); + if (ret == 0) + igt_assert(expected_ret == 0); + else + igt_assert(-errno == expected_ret); + + gem_sync(fd, cmd_bo); + gem_close(fd, cmd_bo); + + return 1; +} + uint32_t handle; int fd; @@ -266,6 +320,20 @@ igt_main -EINVAL)); } + igt_subtest(cmd-crossing-page) { + uint32_t lri_ok[] = { + MI_LOAD_REGISTER_IMM, + 0x5280, // allowed register address (SO_WRITE_OFFSET[0]) + 0x1, + MI_BATCH_BUFFER_END, + }; + igt_assert( + exec_split_batch(fd, + lri_ok, sizeof(lri_ok), + I915_EXEC_RENDER, + 0)); + } + igt_fixture { gem_close(fd, handle); -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 01/22] drm/i915: Add data structures for command parser
From: Brad Volkin bradley.d.vol...@intel.com The command parser needs to know a few things about certain commands in order to process them correctly. Add structures for storing that information. OTC-Tracker: AXIA-4631 Change-Id: I50b98c71c6655893291c78a2d1b8954577b37a30 Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 51 + 1 file changed, 51 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 14f250a..ff1e201 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1731,6 +1731,57 @@ struct drm_i915_file_private { atomic_t rps_wait_boost; }; +/** + * A command that requires special handling by the command parser. + */ +struct drm_i915_cmd_descriptor { + /** +* Flags describing how the command parser processes the command. +* +* CMD_DESC_FIXED: The command has a fixed length if this is set, +* a length mask if not set +* CMD_DESC_SKIP: The command is allowed but does not follow the +*standard length encoding for the opcode range in +*which it falls +*/ + u32 flags; +#define CMD_DESC_FIXED (10) +#define CMD_DESC_SKIP (11) + + /** +* The command's unique identification bits and the bitmask to get them. +* This isn't strictly the opcode field as defined in the spec and may +* also include type, subtype, and/or subop fields. +*/ + struct { + u32 value; + u32 mask; + } cmd; + + /** +* The command's length. The command is either fixed length (i.e. does +* not include a length field) or has a length field mask. The flag +* CMD_DESC_FIXED indicates a fixed length. Otherwise, the command has +* a length mask. All command entries in a command table must include +* length information. +*/ + union { + u32 fixed; + u32 mask; + } length; +}; + +/** + * A table of commands requiring special handling by the command parser. + * + * Each ring has an array of tables. Each table consists of an array of command + * descriptors, which must be sorted with command opcodes in ascending order. + */ +struct drm_i915_cmd_table { + const struct drm_i915_cmd_descriptor *table; + int count; +}; + #define INTEL_INFO(dev)(to_i915(dev)-info) #define IS_I830(dev) ((dev)-pdev-device == 0x3577) -- 1.8.4.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 04/22] drm/i915: Add per-ring command length decode functions
From: Brad Volkin bradley.d.vol...@intel.com For commands that aren't in the parser's tables, we get the length based on standard per-ring command encodings for specific opcode ranges. These functions just return the bitmask and the parser will extract the actual length value. OTC-Tracker: AXIA-4631 Change-Id: I2729d4483931cb4aea9403fd43710c4d4e8e5e89 Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 62 + drivers/gpu/drm/i915/intel_ringbuffer.h | 12 +++ 2 files changed, 74 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 014e661..247d530 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -137,6 +137,62 @@ static const struct drm_i915_cmd_table gen7_blt_cmds[] = { { blt_cmds, ARRAY_SIZE(blt_cmds) }, }; +#define CLIENT_MASK 0xE000 +#define SUBCLIENT_MASK 0x1800 +#define MI_CLIENT0x +#define RC_CLIENT0x6000 +#define BC_CLIENT0x4000 +#define MEDIA_SUBCLIENT 0x1000 + +static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) +{ + u32 client = cmd_header CLIENT_MASK; + u32 subclient = cmd_header SUBCLIENT_MASK; + + if (client == MI_CLIENT) + return 0x3F; + else if (client == RC_CLIENT) { + if (subclient == MEDIA_SUBCLIENT) + return 0x; + else + return 0xFF; + } + + DRM_DEBUG_DRIVER(CMD: Abnormal rcs cmd length! 0x%08X\n, cmd_header); + return 0; +} + +static u32 gen7_bsd_get_cmd_length_mask(u32 cmd_header) +{ + u32 client = cmd_header CLIENT_MASK; + u32 subclient = cmd_header SUBCLIENT_MASK; + + if (client == MI_CLIENT) + return 0x3F; + else if (client == RC_CLIENT) { + if (subclient == MEDIA_SUBCLIENT) + return 0xFFF; + else + return 0xFF; + } + + DRM_DEBUG_DRIVER(CMD: Abnormal bsd cmd length! 0x%08X\n, cmd_header); + return 0; +} + +static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) +{ + u32 client = cmd_header CLIENT_MASK; + + if (client == MI_CLIENT) + return 0x3F; + else if (client == BC_CLIENT) + return 0xFF; + + DRM_DEBUG_DRIVER(CMD: Abnormal blt cmd length! 0x%08X\n, cmd_header); + return 0; +} + void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) { if (!IS_GEN7(ring-dev)) @@ -152,18 +208,24 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) ring-cmd_tables = gen7_render_cmds; ring-cmd_table_count = ARRAY_SIZE(gen7_render_cmds); } + + ring-get_cmd_length_mask = gen7_render_get_cmd_length_mask; break; case VCS: ring-cmd_tables = gen7_video_cmds; ring-cmd_table_count = ARRAY_SIZE(gen7_video_cmds); + ring-get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; case BCS: ring-cmd_tables = gen7_blt_cmds; ring-cmd_table_count = ARRAY_SIZE(gen7_blt_cmds); + ring-get_cmd_length_mask = gen7_blt_get_cmd_length_mask; break; case VECS: ring-cmd_tables = hsw_vebox_cmds; ring-cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds); + /* VECS can use the same length_mask function as VCS */ + ring-get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; default: DRM_DEBUG(CMD: cmd_parser_init with unknown ring: %d\n, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 67305d3..8e71b59 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -169,6 +169,18 @@ struct intel_ring_buffer { */ const struct drm_i915_cmd_table *cmd_tables; int cmd_table_count; + + /** +* Returns the bitmask for the length field of the specified command. +* Return 0 for an unrecognized/invalid command. +* +* If the command parser finds an entry for a command in the ring's +* cmd_tables, it gets the command's length based on the table entry. +* If not, it calls this function to determine the per-ring length field +* encoding for the command (i.e. certain opcode ranges use certain bits +* to encode the command length in the header). +*/ + u32 (*get_cmd_length_mask)(u32 cmd_header); }; static inline bool -- 1.8.4.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 15/22] drm/i915: Reject commands that would store to global HWS page
From: Brad Volkin bradley.d.vol...@intel.com PIPE_CONTROL and MI_FLUSH_DW have bits that would write to the hardware status page. There are no users of this today and it seems unsafe. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 30 ++ drivers/gpu/drm/i915/i915_reg.h| 1 + 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 7b30a03..f32dc69 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -131,7 +131,8 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = { }, { .offset = 1, - .mask = PIPE_CONTROL_GLOBAL_GTT_IVB, + .mask = (PIPE_CONTROL_GLOBAL_GTT_IVB | +PIPE_CONTROL_STORE_DATA_INDEX), .expected = 0, .condition_offset = 1, .condition_mask = PIPE_CONTROL_POST_SYNC_OP_MASK @@ -167,8 +168,15 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = { .expected = 0, .condition_offset = 0, .condition_mask = MI_FLUSH_DW_OP_MASK + }, + { + .offset = 0, + .mask = MI_FLUSH_DW_STORE_INDEX, + .expected = 0, + .condition_offset = 0, + .condition_mask = MI_FLUSH_DW_OP_MASK }}, - .bits_count = 2 ), + .bits_count = 3 ), CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, B, .bits = {{ .offset = 0, @@ -192,8 +200,15 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = { .expected = 0, .condition_offset = 0, .condition_mask = MI_FLUSH_DW_OP_MASK + }, + { + .offset = 0, + .mask = MI_FLUSH_DW_STORE_INDEX, + .expected = 0, + .condition_offset = 0, + .condition_mask = MI_FLUSH_DW_OP_MASK }}, - .bits_count = 2 ), + .bits_count = 3 ), CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, B, .bits = {{ .offset = 0, @@ -217,8 +232,15 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = { .expected = 0, .condition_offset = 0, .condition_mask = MI_FLUSH_DW_OP_MASK + }, + { + .offset = 0, + .mask = MI_FLUSH_DW_STORE_INDEX, + .expected = 0, + .condition_offset = 0, + .condition_mask = MI_FLUSH_DW_OP_MASK }}, - .bits_count = 2 ), + .bits_count = 3 ), CMD( COLOR_BLT,S2D, !F, 0x3F, S ), CMD( SRC_COPY_BLT, S2D, !F, 0x3F, S ), }; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3f64d41..919d1a6 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -323,6 +323,7 @@ #define GFX_OP_PIPE_CONTROL(len) ((0x329)|(0x327)|(0x224)|(len-2)) #define PIPE_CONTROL_GLOBAL_GTT_IVB (124) /* gen7+ */ #define PIPE_CONTROL_MMIO_WRITE (123) +#define PIPE_CONTROL_STORE_DATA_INDEX(121) #define PIPE_CONTROL_CS_STALL(120) #define PIPE_CONTROL_TLB_INVALIDATE (118) #define PIPE_CONTROL_QW_WRITE(114) -- 1.8.4.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 18/22] drm/i915: Reject MI_ARB_ON_OFF on VECS
From: Brad Volkin bradley.d.vol...@intel.com Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index c8426af..5593740 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -197,6 +197,7 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = { }; static const struct drm_i915_cmd_descriptor vecs_cmds[] = { + CMD( MI_ARB_ON_OFF,SMI,F, 1, R ), CMD( MI_FLUSH_DW, SMI, !F, 0x3F, B, .bits = {{ .offset = 0, -- 1.8.4.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 06/22] drm/i915: Add a HAS_CMD_PARSER getparam
From: Brad Volkin bradley.d.vol...@intel.com So userspace can query the kernel for command parser support. OTC-Tracker: AXIA-4631 Change-Id: I58af650db9f6753c2dcac9c54ab432fd31db302f Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_dma.c | 3 +++ include/uapi/drm/i915_drm.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 5aeb103..f0a4638 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1003,6 +1003,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_HANDLE_LUT: value = 1; break; + case I915_PARAM_HAS_CMD_PARSER: + value = 1; + break; default: DRM_DEBUG(Unknown parameter %d\n, param-param); return -EINVAL; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 52aed89..48cc277 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -337,6 +337,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_EXEC_NO_RELOC25 #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 #define I915_PARAM_HAS_WT 27 +#define I915_PARAM_HAS_CMD_PARSER 28 typedef struct drm_i915_getparam { int param; -- 1.8.4.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 09/22] drm/i915: Add support for rejecting commands via bitmasks
From: Brad Volkin bradley.d.vol...@intel.com A variety of checks we want to do amount to verifying that a given bit or bits are set/clear in a given dword of a command. For now, allow a small but arbitrary number of bitmasks for each command. OTC-Tracker: AXIA-4631 Change-Id: Icc77316c243b6e218774c15e2c090cc470d59317 Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 22 ++ drivers/gpu/drm/i915/i915_drv.h| 16 2 files changed, 38 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 2dbca01..99d15f3 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -400,6 +400,28 @@ int i915_parse_cmds(struct intel_ring_buffer *ring, } } + if (desc-flags CMD_DESC_BITMASK) { + int i; + + for (i = 0; i desc-bits_count; i++) { + u32 dword = cmd[desc-bits[i].offset] + desc-bits[i].mask; + + if (dword != desc-bits[i].expected) { + DRM_DEBUG_DRIVER(CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (ring=%d)\n, +*cmd, +desc-bits[i].mask, +desc-bits[i].expected, +dword, ring-id); + ret = -EINVAL; + break; + } + } + + if (ret) + break; + } + cmd += length; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 83b6031..f31fc68 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1752,6 +1752,7 @@ struct drm_i915_cmd_descriptor { #define CMD_DESC_SKIP (11) #define CMD_DESC_REJECT (12) #define CMD_DESC_REGISTER (13) +#define CMD_DESC_BITMASK (14) /** * The command's unique identification bits and the bitmask to get them. @@ -1784,6 +1785,21 @@ struct drm_i915_cmd_descriptor { u32 offset; u32 mask; } reg; + +#define MAX_CMD_DESC_BITMASKS 3 + /** +* Describes command checks where a particular dword is masked and +* compared against an expected value. If the command does not match +* the expected value, the parser rejects it. Only valid if flags has +* the CMD_DESC_BITMASK bit set. +*/ + struct { + u32 offset; + u32 mask; + u32 expected; + } bits[MAX_CMD_DESC_BITMASKS]; + /** Number of valid entries in the bits array */ + int bits_count; }; /** -- 1.8.4.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 14/22] drm/i915: Enable PPGTT command parser checks
From: Brad Volkin bradley.d.vol...@intel.com Various commands that access memory have a bit to determine whether the graphics address specified in the command should use the GGTT or PPGTT for translation. These checks ensure that the bit indicates PPGTT translation. Most of these checks use the existing bit-checking infrastructure. The PIPE_CONTROL and MI_FLUSH_DW commands, however, are multi-function commands. The GGTT/PPGTT bit is only relevant for certain uses of the command. As such, this change also extends the bit-checking code to include a condition mask and offset. If the condition mask is non-zero then the parser only performs the bit check when the bits specified by the condition mask/offset are also non-zero. NOTE: At this point in the series PPGTT must be enabled for the parser to work correctly. If it's not enabled, userspace will not be setting the PPGTT bits the way the parser requires. There's a WARN_ON to detect this case. OTC-Tracker: AXIA-4631 Change-Id: I3f4c76b6734f1956ec47e698230f97d0998ff92b Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 110 ++--- drivers/gpu/drm/i915/i915_drv.h| 6 ++ drivers/gpu/drm/i915/i915_reg.h| 5 ++ 3 files changed, 111 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index b881d39..7b30a03 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -61,15 +61,33 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_REPORT_HEAD, SMI,F, 1, S ), CMD( MI_SUSPEND_FLUSH, SMI,F, 1, S ), CMD( MI_SEMAPHORE_MBOX,SMI, !F, 0xFF, R ), - CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ), + CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B, + .bits = {{ + .offset = 0, + .mask = MI_GLOBAL_GTT, + .expected = 0 + }}, + .bits_count = 1 ), CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ), CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, .reg = { .offset = 1, .mask = 0x007C } ), CMD( MI_UPDATE_GTT,SMI, !F, 0xFF, R ), - CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, W, - .reg = { .offset = 1, .mask = 0x007C } ), - CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W, - .reg = { .offset = 1, .mask = 0x007C } ), + CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, W | B, + .reg = { .offset = 1, .mask = 0x007C }, + .bits = {{ + .offset = 0, + .mask = MI_GLOBAL_GTT, + .expected = 0 + }}, + .bits_count = 1 ), + CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W | B, + .reg = { .offset = 1, .mask = 0x007C }, + .bits = {{ + .offset = 0, + .mask = MI_GLOBAL_GTT, + .expected = 0 + }}, + .bits_count = 1 ), CMD( MI_BATCH_BUFFER_START,SMI, !F, 0xFF, S ), }; @@ -79,7 +97,20 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = { CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), CMD( MI_PREDICATE, SMI,F, 1, S ), CMD( MI_TOPOLOGY_FILTER, SMI,F, 1, S ), - CMD( MI_CLFLUSH, SMI, !F, 0x3FF, S ), + CMD( MI_CLFLUSH, SMI, !F, 0x3FF, B, + .bits = {{ + .offset = 0, + .mask = MI_GLOBAL_GTT, + .expected = 0 + }}, + .bits_count = 1 ), + CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, B, + .bits = {{ + .offset = 0, + .mask = MI_GLOBAL_GTT, + .expected = 0 + }}, + .bits_count = 1 ), CMD( GFX_OP_3DSTATE_VF_STATISTICS, S3D,F, 1, S ), CMD( PIPELINE_SELECT, S3D,F, 1, S ), CMD( MEDIA_VFE_STATE, S3D, !F, 0x, B, @@ -97,8 +128,15 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = { .offset = 1,
[Intel-gfx] [RFC 07/22] drm/i915: Add support for rejecting commands during parsing
From: Brad Volkin bradley.d.vol...@intel.com Certain commands are always disallowed from userspace. This adds the ability for the command parser to detect such commands and reject batch buffers containing them. OTC-Tracker: AXIA-4631 Change-Id: I000b0df4d441ec80b607a50d35e83418cdfd38b3 Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 6 ++ drivers/gpu/drm/i915/i915_drv.h| 6 -- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index b01628e..c64f640 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -368,6 +368,12 @@ int i915_parse_cmds(struct intel_ring_buffer *ring, break; } + if (desc-flags CMD_DESC_REJECT) { + DRM_DEBUG_DRIVER(CMD: Rejected command: 0x%08X\n, *cmd); + ret = -EINVAL; + break; + } + cmd += length; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 81ef047..6ace856 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1743,10 +1743,12 @@ struct drm_i915_cmd_descriptor { * CMD_DESC_SKIP: The command is allowed but does not follow the *standard length encoding for the opcode range in *which it falls +* CMD_DESC_REJECT: The command is never allowed */ u32 flags; -#define CMD_DESC_FIXED (10) -#define CMD_DESC_SKIP (11) +#define CMD_DESC_FIXED (10) +#define CMD_DESC_SKIP (11) +#define CMD_DESC_REJECT (12) /** * The command's unique identification bits and the bitmask to get them. -- 1.8.4.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 10/22] drm/i915: Reject unsafe commands
From: Brad Volkin bradley.d.vol...@intel.com These commands allow userspace to affect global state. OTC-Tracker: AXIA-4631 Change-Id: I80a22c9cd83181790d2a9064e70ea09326691b66 Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 99d15f3..8ee4cda 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -47,6 +47,7 @@ #define SMFX STD_MFX_OPCODE_MASK #define F CMD_DESC_FIXED #define S CMD_DESC_SKIP +#define R CMD_DESC_REJECT /*Command Mask Fixed Len Action -- */ @@ -57,10 +58,11 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_ARB_CHECK, SMI,F, 1, S ), CMD( MI_REPORT_HEAD, SMI,F, 1, S ), CMD( MI_SUSPEND_FLUSH, SMI,F, 1, S ), - CMD( MI_SEMAPHORE_MBOX,SMI, !F, 0xFF, S ), + CMD( MI_SEMAPHORE_MBOX,SMI, !F, 0xFF, R ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ), - CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, S ), + CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ), CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, S ), + CMD( MI_UPDATE_GTT,SMI, !F, 0xFF, R ), CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, S ), CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, S ), CMD( MI_BATCH_BUFFER_START,SMI, !F, 0xFF, S ), @@ -68,8 +70,8 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { static const struct drm_i915_cmd_descriptor render_cmds[] = { CMD( MI_FLUSH, SMI,F, 1, S ), - CMD( MI_ARB_ON_OFF,SMI,F, 1, S ), - CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, S ), + CMD( MI_ARB_ON_OFF,SMI,F, 1, R ), + CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), CMD( MI_PREDICATE, SMI,F, 1, S ), CMD( MI_TOPOLOGY_FILTER, SMI,F, 1, S ), CMD( MI_CLFLUSH, SMI, !F, 0x3FF, S ), @@ -94,12 +96,12 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { }; static const struct drm_i915_cmd_descriptor video_cmds[] = { - CMD( MI_ARB_ON_OFF,SMI,F, 1, S ), + CMD( MI_ARB_ON_OFF,SMI,F, 1, R ), CMD( MFX_WAIT, SMFX, !F, 0x3F, S ), }; static const struct drm_i915_cmd_descriptor blt_cmds[] = { - CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, S ), + CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), CMD( COLOR_BLT,S2D, !F, 0x3F, S ), CMD( SRC_COPY_BLT, S2D, !F, 0x3F, S ), }; @@ -111,6 +113,7 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = { #undef SMFX #undef F #undef S +#undef R static const struct drm_i915_cmd_table gen7_render_cmds[] = { { common_cmds, ARRAY_SIZE(common_cmds) }, -- 1.8.4.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 22/22] drm/i915: Enable command parsing by default
From: Brad Volkin bradley.d.vol...@intel.com OTC-Tracker: AXIA-4631 Change-Id: I6747457e1fe7494bd42787af51198fcba398ad78 Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 90d7db0..8c0d91b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -154,10 +154,10 @@ module_param_named(prefault_disable, i915_prefault_disable, bool, 0600); MODULE_PARM_DESC(prefault_disable, Disable page prefaulting for pread/pwrite/reloc (default:false). For developers only.); -int i915_enable_cmd_parser __read_mostly = 0; +int i915_enable_cmd_parser __read_mostly = 1; module_param_named(enable_cmd_parser, i915_enable_cmd_parser, int, 0600); MODULE_PARM_DESC(enable_cmd_parser, - Enable command parsing (default: false)); + Enable command parsing (default: true)); static struct drm_driver driver; -- 1.8.4.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx