[Intel-gfx] [PATCH v4 3/7] drm/i915: Add a batch pool debugfs file

2014-11-07 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

It provides some useful information about the buffers in
the global command parser batch pool.

v2: rebase on global pool instead of per-ring pools

v3: rebase

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c | 41 +
 1 file changed, 41 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 319da61..efdd59a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -582,6 +582,46 @@ static int i915_gem_pageflip_info(struct seq_file *m, void 
*data)
return 0;
 }
 
+static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
+{
+   struct drm_info_node *node = m-private;
+   struct drm_device *dev = node-minor-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct drm_i915_gem_object *obj;
+   int count = 0;
+   int ret;
+
+   ret = mutex_lock_interruptible(dev-struct_mutex);
+   if (ret)
+   return ret;
+
+   seq_puts(m, active:\n);
+   list_for_each_entry(obj,
+   dev_priv-mm.batch_pool.active_list,
+   batch_pool_list) {
+   seq_puts(m,);
+   describe_obj(m, obj);
+   seq_putc(m, '\n');
+   count++;
+   }
+
+   seq_puts(m, inactive:\n);
+   list_for_each_entry(obj,
+   dev_priv-mm.batch_pool.inactive_list,
+   batch_pool_list) {
+   seq_puts(m,);
+   describe_obj(m, obj);
+   seq_putc(m, '\n');
+   count++;
+   }
+
+   seq_printf(m, total: %d\n, count);
+
+   mutex_unlock(dev-struct_mutex);
+
+   return 0;
+}
+
 static int i915_gem_request_info(struct seq_file *m, void *data)
 {
struct drm_info_node *node = m-private;
@@ -4262,6 +4302,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{i915_gem_hws_blt, i915_hws_info, 0, (void *)BCS},
{i915_gem_hws_bsd, i915_hws_info, 0, (void *)VCS},
{i915_gem_hws_vebox, i915_hws_info, 0, (void *)VECS},
+   {i915_gem_batch_pool, i915_gem_batch_pool_info, 0},
{i915_frequency_info, i915_frequency_info, 0},
{i915_drpc_info, i915_drpc_info, 0},
{i915_emon_status, i915_emon_status, 0},
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v4 4/7] drm/i915: Add batch pool details to i915_gem_objects debugfs

2014-11-07 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

To better account for the potentially large memory consumption
of the batch pool.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c | 45 +
 1 file changed, 36 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index efdd59a..60d5ceb 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -360,6 +360,38 @@ static int per_file_stats(int id, void *ptr, void *data)
return 0;
 }
 
+#define print_file_stats(m, name, stats) \
+   seq_printf(m, %s: %u objects, %zu bytes (%zu active, %zu inactive, %zu 
global, %zu shared, %zu unbound)\n, \
+  name, \
+  stats.count, \
+  stats.total, \
+  stats.active, \
+  stats.inactive, \
+  stats.global, \
+  stats.shared, \
+  stats.unbound)
+
+static void print_batch_pool_stats(struct seq_file *m,
+  struct drm_i915_private *dev_priv)
+{
+   struct drm_i915_gem_object *obj;
+   struct file_stats stats;
+
+   memset(stats, 0, sizeof(stats));
+
+   list_for_each_entry(obj,
+   dev_priv-mm.batch_pool.active_list,
+   batch_pool_list)
+   per_file_stats(0, obj, stats);
+
+   list_for_each_entry(obj,
+   dev_priv-mm.batch_pool.inactive_list,
+   batch_pool_list)
+   per_file_stats(0, obj, stats);
+
+   print_file_stats(m, batch pool, stats);
+}
+
 #define count_vmas(list, member) do { \
list_for_each_entry(vma, list, member) { \
size += i915_gem_obj_ggtt_size(vma-obj); \
@@ -442,6 +474,9 @@ static int i915_gem_object_info(struct seq_file *m, void* 
data)
   dev_priv-gtt.mappable_end - dev_priv-gtt.base.start);
 
seq_putc(m, '\n');
+   print_batch_pool_stats(m, dev_priv);
+
+   seq_putc(m, '\n');
list_for_each_entry_reverse(file, dev-filelist, lhead) {
struct file_stats stats;
struct task_struct *task;
@@ -459,15 +494,7 @@ static int i915_gem_object_info(struct seq_file *m, void* 
data)
 */
rcu_read_lock();
task = pid_task(file-pid, PIDTYPE_PID);
-   seq_printf(m, %s: %u objects, %zu bytes (%zu active, %zu 
inactive, %zu global, %zu shared, %zu unbound)\n,
-  task ? task-comm : unknown,
-  stats.count,
-  stats.total,
-  stats.active,
-  stats.inactive,
-  stats.global,
-  stats.shared,
-  stats.unbound);
+   print_file_stats(m, task ? task-comm : unknown, stats);
rcu_read_unlock();
}
 
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v4 6/7] drm/i915: Mark shadow batch buffers as purgeable

2014-11-07 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

By adding a new exec_entry flag, we cleanly mark the shadow objects
as purgeable after they are on the active list.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 20835b8..a271bc0 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -37,6 +37,7 @@
 #define  __EXEC_OBJECT_HAS_FENCE (130)
 #define  __EXEC_OBJECT_NEEDS_MAP (129)
 #define  __EXEC_OBJECT_NEEDS_BIAS (128)
+#define  __EXEC_OBJECT_PURGEABLE (127)
 
 #define BATCH_OFFSET_BIAS (256*1024)
 
@@ -223,7 +224,12 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
if (entry-flags  __EXEC_OBJECT_HAS_PIN)
vma-pin_count--;
 
-   entry-flags = ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
+   if (entry-flags  __EXEC_OBJECT_PURGEABLE)
+   obj-madv = I915_MADV_DONTNEED;
+
+   entry-flags = ~(__EXEC_OBJECT_HAS_FENCE |
+ __EXEC_OBJECT_HAS_PIN |
+ __EXEC_OBJECT_PURGEABLE);
 }
 
 static void eb_destroy(struct eb_vmas *eb)
@@ -1373,6 +1379,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err;
}
 
+   shadow_batch_obj-madv = I915_MADV_WILLNEED;
+
ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0);
if (ret)
goto err;
@@ -1396,6 +1404,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
vma-exec_entry = shadow_exec_entry;
+   vma-exec_entry-flags = __EXEC_OBJECT_PURGEABLE;
drm_gem_object_reference(shadow_batch_obj-base);
list_add_tail(vma-exec_list, eb-vmas);
 
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v4 0/7] Command parser batch buffer copy

2014-11-07 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This is v4 of the series I sent here:
http://lists.freedesktop.org/archives/intel-gfx/2014-November/054733.html

This version incorporates most of the feedback from v3. The couple of things
that I missed (mostly for timing reasons) are:
* Move 'pending_read_domains |= I915_GEM_DOMAIN_COMMAND' after the parser
* Maybe remove the memsets from the batch copy function
* Today's feedback from Chris and Daniel r.e. madv

I'd suggest that the first two could be small follow up patches, and the madv
changes I did based on Daniel's earlier comments were pulled into a separate
patch that could be rewritten or modified as needed.

Brad Volkin (7):
  drm/i915: Implement a framework for batch buffer pools
  drm/i915: Use batch pools with the command parser
  drm/i915: Add a batch pool debugfs file
  drm/i915: Add batch pool details to i915_gem_objects debugfs
  drm/i915: Use batch length instead of object size in command parser
  drm/i915: Mark shadow batch buffers as purgeable
  drm/i915: Tidy up execbuffer command parsing code

 Documentation/DocBook/drm.tmpl |   5 +
 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/i915_cmd_parser.c |  97 ++
 drivers/gpu/drm/i915/i915_debugfs.c|  86 ++--
 drivers/gpu/drm/i915/i915_dma.c|   1 +
 drivers/gpu/drm/i915/i915_drv.h|  24 +
 drivers/gpu/drm/i915/i915_gem.c|   3 +
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 152 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 105 
 9 files changed, 430 insertions(+), 44 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c

-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v4 2/7] drm/i915: Use batch pools with the command parser

2014-11-07 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This patch sets up all of the tracking and copying necessary to
use batch pools with the command parser and dispatches the copied
(shadow) batch to the hardware.

After this patch, the parser is in 'enabling' mode.

Note that performance takes a hit from the copy in some cases
and will likely need some work. At a rough pass, the memcpy
appears to be the bottleneck. Without having done a deeper
analysis, two ideas that come to mind are:
1) Copy sections of the batch at a time, as they are reached
   by parsing. Might improve cache locality.
2) Copy only up to the userspace-supplied batch length and
   memset the rest of the buffer. Reduces the number of reads.

v2:
- Remove setting the capacity of the pool
- One global pool instead of per-ring pools
- Replace batch_obj with shadow_batch_obj and hook into eb-vmas
- Memset any space in the shadow batch beyond what gets copied
- Rebased on execlist prep refactoring

v3:
- Rebase on chained batch handling
- Squash in setting the secure dispatch flag
- Add a note about the interaction w/secure dispatch pinning
- Check for request-batch_obj == NULL in i915_gem_free_request

v4:
- Fix read domains for shadow_batch_obj
- Remove the set_to_gtt_domain call from i915_parse_cmds
- ggtt_pin/unpin in the parser block to simplify error handling
- Check USES_FULL_PPGTT before setting DISPATCH_SECURE flag
- Remove i915_gem_batch_pool_put calls

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 79 +++---
 drivers/gpu/drm/i915/i915_dma.c|  1 +
 drivers/gpu/drm/i915/i915_drv.h|  8 +++
 drivers/gpu/drm/i915/i915_gem.c|  2 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 49 --
 5 files changed, 117 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 809bb95..5a3f4e4 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -838,6 +838,56 @@ finish:
return (u32*)addr;
 }
 
+/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
+static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
+  struct drm_i915_gem_object *src_obj)
+{
+   int ret = 0;
+   int needs_clflush = 0;
+   u32 *src_addr, *dest_addr = NULL;
+
+   ret = i915_gem_obj_prepare_shmem_read(src_obj, needs_clflush);
+   if (ret) {
+   DRM_DEBUG_DRIVER(CMD: failed to prep read\n);
+   return ERR_PTR(ret);
+   }
+
+   src_addr = vmap_batch(src_obj);
+   if (!src_addr) {
+   DRM_DEBUG_DRIVER(CMD: Failed to vmap batch\n);
+   ret = -ENOMEM;
+   goto unpin_src;
+   }
+
+   if (needs_clflush)
+   drm_clflush_virt_range((char *)src_addr, src_obj-base.size);
+
+   ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
+   if (ret) {
+   DRM_DEBUG_DRIVER(CMD: Failed to set batch CPU domain\n);
+   goto unmap_src;
+   }
+
+   dest_addr = vmap_batch(dest_obj);
+   if (!dest_addr) {
+   DRM_DEBUG_DRIVER(CMD: Failed to vmap shadow batch\n);
+   ret = -ENOMEM;
+   goto unmap_src;
+   }
+
+   memcpy(dest_addr, src_addr, src_obj-base.size);
+   if (dest_obj-base.size  src_obj-base.size)
+   memset((u8 *)dest_addr + src_obj-base.size, 0,
+  dest_obj-base.size - src_obj-base.size);
+
+unmap_src:
+   vunmap(src_addr);
+unpin_src:
+   i915_gem_object_unpin_pages(src_obj);
+
+   return ret ? ERR_PTR(ret) : dest_addr;
+}
+
 /**
  * i915_needs_cmd_parser() - should a given ring use software command parsing?
  * @ring: the ring in question
@@ -954,6 +1004,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  * i915_parse_cmds() - parse a submitted batch buffer for privilege violations
  * @ring: the ring on which the batch is to execute
  * @batch_obj: the batch buffer in question
+ * @shadow_batch_obj: copy of the batch buffer in question
  * @batch_start_offset: byte offset in the batch at which execution starts
  * @is_master: is the submitting process the drm master?
  *
@@ -965,32 +1016,28 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  */
 int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_gem_object *batch_obj,
+   struct drm_i915_gem_object *shadow_batch_obj,
u32 batch_start_offset,
bool is_master)
 {
int ret = 0;
u32 *cmd, *batch_base, *batch_end;
struct drm_i915_cmd_descriptor default_desc = { 0 };
-   int needs_clflush = 0;
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
-   ret = i915_gem_obj_prepare_shmem_read(batch_obj, needs_clflush);
-   if (ret) {
- 

[Intel-gfx] [PATCH v4 1/7] drm/i915: Implement a framework for batch buffer pools

2014-11-07 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This adds a small module for managing a pool of batch buffers.
The only current use case is for the command parser, as described
in the kerneldoc in the patch. The code is simple, but separating
it out makes it easier to change the underlying algorithms and to
extend to future use cases should they arise.

The interface is simple: init to create an empty pool, fini to
clean it up, get to obtain a new buffer. Note that all buffers are
expected to be inactive before cleaning up the pool.

Locking is currently based on the caller holding the struct_mutex.
We already do that in the places where we will use the batch pool
for the command parser.

v2:
- s/BUG_ON/WARN_ON/ for locking assertions
- Remove the cap on pool size
- Switch from alloc/free to init/fini

v3:
- Idiomatic looping structure in _fini
- Correct handling of purged objects
- Don't return a buffer that's too much larger than needed

v4:
- Rebased to latest -nightly

v5:
- Remove _put() function and clean up comments to match

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 Documentation/DocBook/drm.tmpl |   5 +
 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/i915_drv.h|  15 +++
 drivers/gpu/drm/i915/i915_gem.c|   1 +
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 152 +
 5 files changed, 174 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c

diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 7277a7f..29bc8f5 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -3989,6 +3989,11 @@ int num_ioctls;/synopsis
 !Idrivers/gpu/drm/i915/i915_cmd_parser.c
   /sect2
   sect2
+titleBatchbuffer Pools/title
+!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool
+!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c
+  /sect2
+  sect2
 titleLogical Rings, Logical Ring Contexts and Execlists/title
 !Pdrivers/gpu/drm/i915/intel_lrc.c Logical Rings, Logical Ring Contexts and 
Execlists
 !Idrivers/gpu/drm/i915/intel_lrc.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 891e584..73cd2d7 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -19,6 +19,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
 
 # GEM code
 i915-y += i915_cmd_parser.o \
+ i915_gem_batch_pool.o \
  i915_gem_context.o \
  i915_gem_render_state.o \
  i915_gem_debug.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8fb8eba..2955ed9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1134,6 +1134,12 @@ struct intel_l3_parity {
int which_slice;
 };
 
+struct i915_gem_batch_pool {
+   struct drm_device *dev;
+   struct list_head active_list;
+   struct list_head inactive_list;
+};
+
 struct i915_gem_mm {
/** Memory allocator for GTT stolen memory */
struct drm_mm stolen;
@@ -1865,6 +1871,8 @@ struct drm_i915_gem_object {
/** Used in execbuf to temporarily hold a ref */
struct list_head obj_exec_link;
 
+   struct list_head batch_pool_list;
+
/**
 * This is set if the object is on the active lists (has pending
 * rendering and so a non-zero seqno), and is not set if it i s on
@@ -2829,6 +2837,13 @@ void i915_destroy_error_state(struct drm_device *dev);
 void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone);
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
 
+/* i915_gem_batch_pool.c */
+void i915_gem_batch_pool_init(struct drm_device *dev,
+ struct i915_gem_batch_pool *pool);
+void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
+struct drm_i915_gem_object*
+i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
+
 /* i915_cmd_parser.c */
 int i915_cmd_parser_get_version(void);
 int i915_cmd_parser_init_ring(struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3e0cabe..875c151 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4352,6 +4352,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
INIT_LIST_HEAD(obj-ring_list);
INIT_LIST_HEAD(obj-obj_exec_link);
INIT_LIST_HEAD(obj-vma_list);
+   INIT_LIST_HEAD(obj-batch_pool_list);
 
obj-ops = ops;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c 
b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
new file mode 100644
index 000..a55e43b
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the 

[Intel-gfx] [PATCH v4 5/7] drm/i915: Use batch length instead of object size in command parser

2014-11-07 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Previously we couldn't trust the user-supplied batch length because
it came directly from userspace (i.e. untrusted code). It would have
affected what commands software parsed without regard to what hardware
would actually execute, leaving a potential hole.

With the parser now copying the user supplied batch buffer and writing
MI_NOP commands to any space after the copied region, we can safely use
the batch length input. This should be a performance win as the actual
batch length is frequently much smaller than the allocated object size.

v2: Fix handling of non-zero batch_start_offset

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 48 --
 drivers/gpu/drm/i915/i915_drv.h|  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  1 +
 3 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 5a3f4e4..30b3163 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -840,11 +840,19 @@ finish:
 
 /* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
 static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
-  struct drm_i915_gem_object *src_obj)
+  struct drm_i915_gem_object *src_obj,
+  u32 batch_start_offset,
+  u32 batch_len)
 {
int ret = 0;
int needs_clflush = 0;
-   u32 *src_addr, *dest_addr = NULL;
+   u32 *src_base, *dest_base = NULL;
+   u32 *src_addr, *dest_addr;
+   u32 offset = batch_start_offset / sizeof(*dest_addr);
+   u32 end = batch_start_offset + batch_len;
+
+   if (end  dest_obj-base.size || end  src_obj-base.size)
+   return ERR_PTR(-E2BIG);
 
ret = i915_gem_obj_prepare_shmem_read(src_obj, needs_clflush);
if (ret) {
@@ -852,15 +860,17 @@ static u32 *copy_batch(struct drm_i915_gem_object 
*dest_obj,
return ERR_PTR(ret);
}
 
-   src_addr = vmap_batch(src_obj);
-   if (!src_addr) {
+   src_base = vmap_batch(src_obj);
+   if (!src_base) {
DRM_DEBUG_DRIVER(CMD: Failed to vmap batch\n);
ret = -ENOMEM;
goto unpin_src;
}
 
+   src_addr = src_base + offset;
+
if (needs_clflush)
-   drm_clflush_virt_range((char *)src_addr, src_obj-base.size);
+   drm_clflush_virt_range((char *)src_addr, batch_len);
 
ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
if (ret) {
@@ -868,24 +878,27 @@ static u32 *copy_batch(struct drm_i915_gem_object 
*dest_obj,
goto unmap_src;
}
 
-   dest_addr = vmap_batch(dest_obj);
-   if (!dest_addr) {
+   dest_base = vmap_batch(dest_obj);
+   if (!dest_base) {
DRM_DEBUG_DRIVER(CMD: Failed to vmap shadow batch\n);
ret = -ENOMEM;
goto unmap_src;
}
 
-   memcpy(dest_addr, src_addr, src_obj-base.size);
-   if (dest_obj-base.size  src_obj-base.size)
-   memset((u8 *)dest_addr + src_obj-base.size, 0,
-  dest_obj-base.size - src_obj-base.size);
+   dest_addr = dest_base + offset;
+
+   if (batch_start_offset != 0)
+   memset((u8 *)dest_base, 0, batch_start_offset);
+
+   memcpy(dest_addr, src_addr, batch_len);
+   memset((u8 *)dest_addr + batch_len, 0, dest_obj-base.size - end);
 
 unmap_src:
-   vunmap(src_addr);
+   vunmap(src_base);
 unpin_src:
i915_gem_object_unpin_pages(src_obj);
 
-   return ret ? ERR_PTR(ret) : dest_addr;
+   return ret ? ERR_PTR(ret) : dest_base;
 }
 
 /**
@@ -1006,6 +1019,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  * @batch_obj: the batch buffer in question
  * @shadow_batch_obj: copy of the batch buffer in question
  * @batch_start_offset: byte offset in the batch at which execution starts
+ * @batch_len: length of the commands in batch_obj
  * @is_master: is the submitting process the drm master?
  *
  * Parses the specified batch buffer looking for privilege violations as
@@ -1018,6 +1032,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_gem_object *batch_obj,
struct drm_i915_gem_object *shadow_batch_obj,
u32 batch_start_offset,
+   u32 batch_len,
bool is_master)
 {
int ret = 0;
@@ -1025,7 +1040,8 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_cmd_descriptor default_desc = { 0 };
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
-   batch_base = copy_batch(shadow_batch_obj, batch_obj);
+   batch_base = copy_batch(shadow_batch_obj, batch_obj,
+   

[Intel-gfx] [PATCH v4 7/7] drm/i915: Tidy up execbuffer command parsing code

2014-11-07 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Move it to a separate function since the main do_execbuffer function
already has so much going on.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 136 +
 1 file changed, 79 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a271bc0..58f0a6c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1026,6 +1026,75 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
return 0;
 }
 
+static struct drm_i915_gem_object*
+i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
+ struct drm_i915_gem_exec_object2 *shadow_exec_entry,
+ struct eb_vmas *eb,
+ struct drm_i915_gem_object *batch_obj,
+ u32 batch_start_offset,
+ u32 batch_len,
+ bool is_master,
+ u32 *flags)
+{
+   struct drm_i915_private *dev_priv = to_i915(batch_obj-base.dev);
+   struct drm_i915_gem_object *shadow_batch_obj;
+   int ret;
+
+   shadow_batch_obj = i915_gem_batch_pool_get(dev_priv-mm.batch_pool,
+  batch_obj-base.size);
+   if (IS_ERR(shadow_batch_obj))
+   return shadow_batch_obj;
+
+   shadow_batch_obj-madv = I915_MADV_WILLNEED;
+
+   ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0);
+   if (ret)
+   goto err;
+
+   ret = i915_parse_cmds(ring,
+ batch_obj,
+ shadow_batch_obj,
+ batch_start_offset,
+ batch_len,
+ is_master);
+   i915_gem_object_ggtt_unpin(shadow_batch_obj);
+
+   if (ret) {
+   if (ret == -EACCES)
+   return batch_obj;
+   } else {
+   struct i915_vma *vma;
+
+   memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
+
+   vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
+   vma-exec_entry = shadow_exec_entry;
+   vma-exec_entry-flags = __EXEC_OBJECT_PURGEABLE;
+   drm_gem_object_reference(shadow_batch_obj-base);
+   list_add_tail(vma-exec_list, eb-vmas);
+
+   shadow_batch_obj-base.pending_read_domains =
+   batch_obj-base.pending_read_domains;
+
+   /*
+* Set the DISPATCH_SECURE bit to remove the NON_SECURE
+* bit from MI_BATCH_BUFFER_START commands issued in the
+* dispatch_execbuffer implementations. We specifically
+* don't want that set when the command parser is
+* enabled.
+*
+* FIXME: with aliasing ppgtt, buffers that should only
+* be in ggtt still end up in the aliasing ppgtt. remove
+* this check when that is fixed.
+*/
+   if (USES_FULL_PPGTT(dev))
+   *flags |= I915_DISPATCH_SECURE;
+   }
+
+err:
+   return ret ? ERR_PTR(ret) : shadow_batch_obj;
+}
+
 int
 i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
   struct intel_engine_cs *ring,
@@ -1242,7 +1311,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct drm_i915_private *dev_priv = dev-dev_private;
struct eb_vmas *eb;
struct drm_i915_gem_object *batch_obj;
-   struct drm_i915_gem_object *shadow_batch_obj = NULL;
struct drm_i915_gem_exec_object2 shadow_exec_entry;
struct intel_engine_cs *ring;
struct intel_context *ctx;
@@ -1369,63 +1437,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
batch_obj-base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
 
if (i915_needs_cmd_parser(ring)) {
-   shadow_batch_obj =
-   i915_gem_batch_pool_get(dev_priv-mm.batch_pool,
-   batch_obj-base.size);
-   if (IS_ERR(shadow_batch_obj)) {
-   ret = PTR_ERR(shadow_batch_obj);
-   /* Don't try to clean up the obj in the error path */
-   shadow_batch_obj = NULL;
-   goto err;
-   }
-
-   shadow_batch_obj-madv = I915_MADV_WILLNEED;
-
-   ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0);
-   if (ret)
+   batch_obj = i915_gem_execbuffer_parse(ring,
+ shadow_exec_entry,
+ eb,
+ 

[Intel-gfx] [PATCH] tests/drv_hangman: skip a few asserts when using the cmd parser

2014-11-04 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This test has a few checks that batch buffer addresses in the error
state match the expected  address for the userspace supplied batch.
But the batch buffer copy piece of the command  parser means that
the logged addresses are actually _supposed_ to be different. So
skip just those checks.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 tests/drv_hangman.c | 43 +--
 1 file changed, 37 insertions(+), 6 deletions(-)

I'm not sure it's actually worth doing any of the work beyond the
'if (bb_matched == 1)' block in check_error_state when the command
parser is enabled. Here I've kept that work and just taken out the
one check that would fail. But it seems like the work that's left
is only there to enable the removed check, so maybe not needed.
I'll defer to those that know more about this test.

diff --git a/tests/drv_hangman.c b/tests/drv_hangman.c
index 3d6b98b..8fbc2d3 100644
--- a/tests/drv_hangman.c
+++ b/tests/drv_hangman.c
@@ -36,6 +36,10 @@
 #include igt_debugfs.h
 #include ioctl_wrappers.h
 
+#ifndef I915_PARAM_CMD_PARSER_VERSION
+#define I915_PARAM_CMD_PARSER_VERSION   28
+#endif
+
 static int _read_sysfs(void *dst, int maxlen,
  const char* path,
  const char *fname)
@@ -262,6 +266,7 @@ static void test_error_state_basic(void)
 }
 
 static void check_error_state(const int gen,
+ const bool uses_cmd_parser,
  const char *expected_ring_name,
  uint64_t expected_offset)
 {
@@ -300,7 +305,8 @@ static void check_error_state(const int gen,
char expected_line[32];
 
igt_assert(strstr(ring_name, expected_ring_name));
-   igt_assert(gtt_offset == expected_offset);
+   if (!uses_cmd_parser)
+   igt_assert(gtt_offset == expected_offset);
 
for (i = 0; i  sizeof(batch) / 4; i++) {
igt_assert(getline(line, line_size, file)  
0);
@@ -352,10 +358,12 @@ static void check_error_state(const int gen,
i++;
}
}
-   if (gen = 4)
-   igt_assert(expected_addr == expected_offset);
-   else
-   igt_assert((expected_addr  ~0x1) == 
expected_offset);
+   if (!uses_cmd_parser) {
+   if (gen = 4)
+   igt_assert(expected_addr == 
expected_offset);
+   else
+   igt_assert((expected_addr  ~0x1) == 
expected_offset);
+   }
ringbuf_ok = true;
continue;
}
@@ -370,22 +378,45 @@ static void check_error_state(const int gen,
close(debug_fd);
 }
 
+static bool uses_cmd_parser(int fd, int gen)
+{
+   int parser_version = 0;
+   drm_i915_getparam_t gp;
+   int rc;
+
+   gp.param = I915_PARAM_CMD_PARSER_VERSION;
+   gp.value = parser_version;
+   rc = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, gp);
+   if (rc || parser_version == 0)
+   return false;
+
+   if (!gem_uses_aliasing_ppgtt(fd))
+   return false;
+
+   if (gen != 7)
+   return false;
+
+   return true;
+}
+
 static void test_error_state_capture(unsigned ring_id,
 const char *ring_name)
 {
int fd, gen;
uint64_t offset;
+   bool cmd_parser;
 
check_other_clients();
clear_error_state();
 
fd = drm_open_any();
gen = intel_gen(intel_get_drm_devid(fd));
+   cmd_parser = uses_cmd_parser(fd, gen);
 
offset = submit_batch(fd, ring_id, true);
close(fd);
 
-   check_error_state(gen, ring_name, offset);
+   check_error_state(gen, cmd_parser, ring_name, offset);
 }
 
 static const struct target_ring {
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 5/5] drm/i915: Use batch length instead of object size in command parser

2014-11-03 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Previously we couldn't trust the user-supplied batch length because
it came directly from userspace (i.e. untrusted code). It would have
affected what commands software parsed without regard to what hardware
would actually execute, leaving a potential hole.

With the parser now copying the user supplied batch buffer and writing
MI_NOP commands to any space after the copied region, we can safely use
the batch length input. This should be a performance win as the actual
batch length is frequently much smaller than the allocated object size.

v2: Fix handling of non-zero batch_start_offset

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 48 --
 drivers/gpu/drm/i915/i915_drv.h|  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  1 +
 3 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index c8fe403..d4d13b1 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -840,11 +840,19 @@ finish:
 
 /* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
 static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
-  struct drm_i915_gem_object *src_obj)
+  struct drm_i915_gem_object *src_obj,
+  u32 batch_start_offset,
+  u32 batch_len)
 {
int ret = 0;
int needs_clflush = 0;
-   u32 *src_addr, *dest_addr = NULL;
+   u32 *src_base, *dest_base = NULL;
+   u32 *src_addr, *dest_addr;
+   u32 offset = batch_start_offset / sizeof(*dest_addr);
+   u32 end = batch_start_offset + batch_len;
+
+   if (end  dest_obj-base.size || end  src_obj-base.size)
+   return ERR_PTR(-E2BIG);
 
ret = i915_gem_obj_prepare_shmem_read(src_obj, needs_clflush);
if (ret) {
@@ -852,15 +860,17 @@ static u32 *copy_batch(struct drm_i915_gem_object 
*dest_obj,
return ERR_PTR(ret);
}
 
-   src_addr = vmap_batch(src_obj);
-   if (!src_addr) {
+   src_base = vmap_batch(src_obj);
+   if (!src_base) {
DRM_DEBUG_DRIVER(CMD: Failed to vmap batch\n);
ret = -ENOMEM;
goto unpin_src;
}
 
+   src_addr = src_base + offset;
+
if (needs_clflush)
-   drm_clflush_virt_range((char *)src_addr, src_obj-base.size);
+   drm_clflush_virt_range((char *)src_addr, batch_len);
 
ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
if (ret) {
@@ -868,24 +878,27 @@ static u32 *copy_batch(struct drm_i915_gem_object 
*dest_obj,
goto unmap_src;
}
 
-   dest_addr = vmap_batch(dest_obj);
-   if (!dest_addr) {
+   dest_base = vmap_batch(dest_obj);
+   if (!dest_base) {
DRM_DEBUG_DRIVER(CMD: Failed to vmap shadow batch\n);
ret = -ENOMEM;
goto unmap_src;
}
 
-   memcpy(dest_addr, src_addr, src_obj-base.size);
-   if (dest_obj-base.size  src_obj-base.size)
-   memset((u8 *)dest_addr + src_obj-base.size, 0,
-  dest_obj-base.size - src_obj-base.size);
+   dest_addr = dest_base + offset;
+
+   if (batch_start_offset != 0)
+   memset((u8 *)dest_base, 0, batch_start_offset);
+
+   memcpy(dest_addr, src_addr, batch_len);
+   memset((u8 *)dest_addr + batch_len, 0, dest_obj-base.size - end);
 
 unmap_src:
-   vunmap(src_addr);
+   vunmap(src_base);
 unpin_src:
i915_gem_object_unpin_pages(src_obj);
 
-   return ret ? ERR_PTR(ret) : dest_addr;
+   return ret ? ERR_PTR(ret) : dest_base;
 }
 
 /**
@@ -1006,6 +1019,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  * @batch_obj: the batch buffer in question
  * @shadow_batch_obj: copy of the batch buffer in question
  * @batch_start_offset: byte offset in the batch at which execution starts
+ * @batch_len: length of the commands in batch_obj
  * @is_master: is the submitting process the drm master?
  *
  * Parses the specified batch buffer looking for privilege violations as
@@ -1018,6 +1032,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_gem_object *batch_obj,
struct drm_i915_gem_object *shadow_batch_obj,
u32 batch_start_offset,
+   u32 batch_len,
bool is_master)
 {
int ret = 0;
@@ -1025,7 +1040,8 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_cmd_descriptor default_desc = { 0 };
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
-   batch_base = copy_batch(shadow_batch_obj, batch_obj);
+   batch_base = copy_batch(shadow_batch_obj, batch_obj,
+   

[Intel-gfx] [PATCH v3 3/5] drm/i915: Add a batch pool debugfs file

2014-11-03 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

It provides some useful information about the buffers in
the global command parser batch pool.

v2: rebase on global pool instead of per-ring pools

v3: rebase

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c | 41 +
 1 file changed, 41 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index a79f83c..5f7cbed 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -582,6 +582,46 @@ static int i915_gem_pageflip_info(struct seq_file *m, void 
*data)
return 0;
 }
 
+static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
+{
+   struct drm_info_node *node = m-private;
+   struct drm_device *dev = node-minor-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct drm_i915_gem_object *obj;
+   int count = 0;
+   int ret;
+
+   ret = mutex_lock_interruptible(dev-struct_mutex);
+   if (ret)
+   return ret;
+
+   seq_puts(m, active:\n);
+   list_for_each_entry(obj,
+   dev_priv-mm.batch_pool.active_list,
+   batch_pool_list) {
+   seq_puts(m,);
+   describe_obj(m, obj);
+   seq_putc(m, '\n');
+   count++;
+   }
+
+   seq_puts(m, inactive:\n);
+   list_for_each_entry(obj,
+   dev_priv-mm.batch_pool.inactive_list,
+   batch_pool_list) {
+   seq_puts(m,);
+   describe_obj(m, obj);
+   seq_putc(m, '\n');
+   count++;
+   }
+
+   seq_printf(m, total: %d\n, count);
+
+   mutex_unlock(dev-struct_mutex);
+
+   return 0;
+}
+
 static int i915_gem_request_info(struct seq_file *m, void *data)
 {
struct drm_info_node *node = m-private;
@@ -4177,6 +4217,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{i915_gem_hws_blt, i915_hws_info, 0, (void *)BCS},
{i915_gem_hws_bsd, i915_hws_info, 0, (void *)VCS},
{i915_gem_hws_vebox, i915_hws_info, 0, (void *)VECS},
+   {i915_gem_batch_pool, i915_gem_batch_pool_info, 0},
{i915_frequency_info, i915_frequency_info, 0},
{i915_drpc_info, i915_drpc_info, 0},
{i915_emon_status, i915_emon_status, 0},
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 4/5] drm/i915: Add batch pool details to i915_gem_objects debugfs

2014-11-03 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

To better account for the potentially large memory consumption
of the batch pool.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c | 45 +
 1 file changed, 36 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 5f7cbed..53f78da 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -360,6 +360,38 @@ static int per_file_stats(int id, void *ptr, void *data)
return 0;
 }
 
+#define print_file_stats(m, name, stats) \
+   seq_printf(m, %s: %u objects, %zu bytes (%zu active, %zu inactive, %zu 
global, %zu shared, %zu unbound)\n, \
+  name, \
+  stats.count, \
+  stats.total, \
+  stats.active, \
+  stats.inactive, \
+  stats.global, \
+  stats.shared, \
+  stats.unbound)
+
+static void print_batch_pool_stats(struct seq_file *m,
+  struct drm_i915_private *dev_priv)
+{
+   struct drm_i915_gem_object *obj;
+   struct file_stats stats;
+
+   memset(stats, 0, sizeof(stats));
+
+   list_for_each_entry(obj,
+   dev_priv-mm.batch_pool.active_list,
+   batch_pool_list)
+   per_file_stats(0, obj, stats);
+
+   list_for_each_entry(obj,
+   dev_priv-mm.batch_pool.inactive_list,
+   batch_pool_list)
+   per_file_stats(0, obj, stats);
+
+   print_file_stats(m, batch pool, stats);
+}
+
 #define count_vmas(list, member) do { \
list_for_each_entry(vma, list, member) { \
size += i915_gem_obj_ggtt_size(vma-obj); \
@@ -442,6 +474,9 @@ static int i915_gem_object_info(struct seq_file *m, void* 
data)
   dev_priv-gtt.mappable_end - dev_priv-gtt.base.start);
 
seq_putc(m, '\n');
+   print_batch_pool_stats(m, dev_priv);
+
+   seq_putc(m, '\n');
list_for_each_entry_reverse(file, dev-filelist, lhead) {
struct file_stats stats;
struct task_struct *task;
@@ -459,15 +494,7 @@ static int i915_gem_object_info(struct seq_file *m, void* 
data)
 */
rcu_read_lock();
task = pid_task(file-pid, PIDTYPE_PID);
-   seq_printf(m, %s: %u objects, %zu bytes (%zu active, %zu 
inactive, %zu global, %zu shared, %zu unbound)\n,
-  task ? task-comm : unknown,
-  stats.count,
-  stats.total,
-  stats.active,
-  stats.inactive,
-  stats.global,
-  stats.shared,
-  stats.unbound);
+   print_file_stats(m, task ? task-comm : unknown, stats);
rcu_read_unlock();
}
 
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 0/5] Command parser batch buffer copy

2014-11-03 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This is v3 of the series I sent here:
http://lists.freedesktop.org/archives/intel-gfx/2014-July/048705.html

Most of the previous commentary still applies. We've fixed the secure
dispatch regression though, so the series now puts the parser into
enabling mode in patch 2.

There are currently some regressions. I've sent i-g-t patches for a couple that
are test issues. The remaining issues are:

drv_hangman error-state-capture-*
The test has checks that the logged 'gtt_offset' matches the expected
offset of the userspace-supplied batch buffer. Similarly for the address
in an MI_BATCH_BUFFER_START command found in the logged ringbuffer contents.
These obviously won't match if the buffer submitted to hardware is from the
batch pool instead of the one from userspace.

gem_reloc_vs_gpu *-thrash-inactive
gem_persistent_relocs *-thrash-inactive
These fail with this type of error:

Test assertion failure function do_test, file gem_reloc_vs_gpu.c:221:
Failed assertion: test == 0xdeadbeef
mismatch in buffer 0: 0x instead of 0xdeadbeef
child 6 failed with exit status 99
Subtest forked-thrash-inactive: FAIL (3.824s)

One crashed, apparently in i915_gem_object_move_to_inactive() called via
i915_gem_reset(). I assume there's an issue with my active tracking or
madv usage for batch pool objects. Any input would be helpful.

gem_cs_tlb
This test takes longer and may time out.

Brad Volkin (5):
  drm/i915: Implement a framework for batch buffer pools
  drm/i915: Use batch pools with the command parser
  drm/i915: Add a batch pool debugfs file
  drm/i915: Add batch pool details to i915_gem_objects debugfs
  drm/i915: Use batch length instead of object size in command parser

 Documentation/DocBook/drm.tmpl |   5 +
 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/i915_cmd_parser.c | 102 +++
 drivers/gpu/drm/i915/i915_debugfs.c|  86 ++--
 drivers/gpu/drm/i915/i915_dma.c|   1 +
 drivers/gpu/drm/i915/i915_drv.h|  26 +
 drivers/gpu/drm/i915/i915_gem.c|  11 +++
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 153 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  62 +++-
 9 files changed, 416 insertions(+), 31 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c

-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 2/5] drm/i915: Use batch pools with the command parser

2014-11-03 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This patch sets up all of the tracking and copying necessary to
use batch pools with the command parser and dispatches the copied
(shadow) batch to the hardware.

After this patch, the parser is in 'enabling' mode.

Note that performance takes a hit from the copy in some cases
and will likely need some work. At a rough pass, the memcpy
appears to be the bottleneck. Without having done a deeper
analysis, two ideas that come to mind are:
1) Copy sections of the batch at a time, as they are reached
   by parsing. Might improve cache locality.
2) Copy only up to the userspace-supplied batch length and
   memset the rest of the buffer. Reduces the number of reads.

v2:
- Remove setting the capacity of the pool
- One global pool instead of per-ring pools
- Replace batch_obj with shadow_batch_obj and hook into eb-vmas
- Memset any space in the shadow batch beyond what gets copied
- Rebased on execlist prep refactoring

v3:
- Rebase on chained batch handling
- Squash in setting the secure dispatch flag
- Add a note about the interaction w/secure dispatch pinning
- Check for request-batch_obj == NULL in i915_gem_free_request

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 84 --
 drivers/gpu/drm/i915/i915_dma.c|  1 +
 drivers/gpu/drm/i915/i915_drv.h|  8 +++
 drivers/gpu/drm/i915/i915_gem.c| 10 
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 61 --
 5 files changed, 143 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 809bb95..c8fe403 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -838,6 +838,56 @@ finish:
return (u32*)addr;
 }
 
+/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
+static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
+  struct drm_i915_gem_object *src_obj)
+{
+   int ret = 0;
+   int needs_clflush = 0;
+   u32 *src_addr, *dest_addr = NULL;
+
+   ret = i915_gem_obj_prepare_shmem_read(src_obj, needs_clflush);
+   if (ret) {
+   DRM_DEBUG_DRIVER(CMD: failed to prep read\n);
+   return ERR_PTR(ret);
+   }
+
+   src_addr = vmap_batch(src_obj);
+   if (!src_addr) {
+   DRM_DEBUG_DRIVER(CMD: Failed to vmap batch\n);
+   ret = -ENOMEM;
+   goto unpin_src;
+   }
+
+   if (needs_clflush)
+   drm_clflush_virt_range((char *)src_addr, src_obj-base.size);
+
+   ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
+   if (ret) {
+   DRM_DEBUG_DRIVER(CMD: Failed to set batch CPU domain\n);
+   goto unmap_src;
+   }
+
+   dest_addr = vmap_batch(dest_obj);
+   if (!dest_addr) {
+   DRM_DEBUG_DRIVER(CMD: Failed to vmap shadow batch\n);
+   ret = -ENOMEM;
+   goto unmap_src;
+   }
+
+   memcpy(dest_addr, src_addr, src_obj-base.size);
+   if (dest_obj-base.size  src_obj-base.size)
+   memset((u8 *)dest_addr + src_obj-base.size, 0,
+  dest_obj-base.size - src_obj-base.size);
+
+unmap_src:
+   vunmap(src_addr);
+unpin_src:
+   i915_gem_object_unpin_pages(src_obj);
+
+   return ret ? ERR_PTR(ret) : dest_addr;
+}
+
 /**
  * i915_needs_cmd_parser() - should a given ring use software command parsing?
  * @ring: the ring in question
@@ -954,6 +1004,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  * i915_parse_cmds() - parse a submitted batch buffer for privilege violations
  * @ring: the ring on which the batch is to execute
  * @batch_obj: the batch buffer in question
+ * @shadow_batch_obj: copy of the batch buffer in question
  * @batch_start_offset: byte offset in the batch at which execution starts
  * @is_master: is the submitting process the drm master?
  *
@@ -965,32 +1016,28 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  */
 int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_gem_object *batch_obj,
+   struct drm_i915_gem_object *shadow_batch_obj,
u32 batch_start_offset,
bool is_master)
 {
int ret = 0;
u32 *cmd, *batch_base, *batch_end;
struct drm_i915_cmd_descriptor default_desc = { 0 };
-   int needs_clflush = 0;
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
-   ret = i915_gem_obj_prepare_shmem_read(batch_obj, needs_clflush);
-   if (ret) {
-   DRM_DEBUG_DRIVER(CMD: failed to prep read\n);
-   return ret;
+   batch_base = copy_batch(shadow_batch_obj, batch_obj);
+   if (IS_ERR(batch_base)) {
+   DRM_DEBUG_DRIVER(CMD: Failed to copy batch\n);
+   return 

[Intel-gfx] [PATCH v3 1/5] drm/i915: Implement a framework for batch buffer pools

2014-11-03 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This adds a small module for managing a pool of batch buffers.
The only current use case is for the command parser, as described
in the kerneldoc in the patch. The code is simple, but separating
it out makes it easier to change the underlying algorithms and to
extend to future use cases should they arise.

The interface is simple: init to create an empty pool, fini to
clean it up; get to obtain a new buffer, put to return it to the
pool. Note that all buffers must be returned to the pool before
cleaning up the pool.

Buffers are purgeable while in the pool, but not explicitly
truncated in order to avoid overhead during execbuf.

Locking is currently based on the caller holding the struct_mutex.
We already do that in the places where we will use the batch pool
for the command parser.

v2:
- s/BUG_ON/WARN_ON/ for locking assertions
- Remove the cap on pool size
- Switch from alloc/free to init/fini

v3:
- Idiomatic looping structure in _fini
- Correct handling of purged objects
- Don't return a buffer that's too much larger than needed

v4:
- Rebased to latest -nightly

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 Documentation/DocBook/drm.tmpl |   5 +
 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/i915_drv.h|  17 
 drivers/gpu/drm/i915/i915_gem.c|   1 +
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 153 +
 5 files changed, 177 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c

diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index f6a9d7b..133f4e6 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -3958,6 +3958,11 @@ int num_ioctls;/synopsis
 !Idrivers/gpu/drm/i915/i915_cmd_parser.c
   /sect2
   sect2
+titleBatchbuffer Pools/title
+!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool
+!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c
+  /sect2
+  sect2
 titleLogical Rings, Logical Ring Contexts and Execlists/title
 !Pdrivers/gpu/drm/i915/intel_lrc.c Logical Rings, Logical Ring Contexts and 
Execlists
 !Idrivers/gpu/drm/i915/intel_lrc.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 891e584..73cd2d7 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -19,6 +19,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
 
 # GEM code
 i915-y += i915_cmd_parser.o \
+ i915_gem_batch_pool.o \
  i915_gem_context.o \
  i915_gem_render_state.o \
  i915_gem_debug.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6a73803..fbf10cc 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1126,6 +1126,12 @@ struct intel_l3_parity {
int which_slice;
 };
 
+struct i915_gem_batch_pool {
+   struct drm_device *dev;
+   struct list_head active_list;
+   struct list_head inactive_list;
+};
+
 struct i915_gem_mm {
/** Memory allocator for GTT stolen memory */
struct drm_mm stolen;
@@ -1797,6 +1803,8 @@ struct drm_i915_gem_object {
/** Used in execbuf to temporarily hold a ref */
struct list_head obj_exec_link;
 
+   struct list_head batch_pool_list;
+
/**
 * This is set if the object is on the active lists (has pending
 * rendering and so a non-zero seqno), and is not set if it i s on
@@ -2758,6 +2766,15 @@ void i915_destroy_error_state(struct drm_device *dev);
 void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone);
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
 
+/* i915_gem_batch_pool.c */
+void i915_gem_batch_pool_init(struct drm_device *dev,
+ struct i915_gem_batch_pool *pool);
+void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
+struct drm_i915_gem_object*
+i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
+void i915_gem_batch_pool_put(struct i915_gem_batch_pool *pool,
+struct drm_i915_gem_object *obj);
+
 /* i915_cmd_parser.c */
 int i915_cmd_parser_get_version(void);
 int i915_cmd_parser_init_ring(struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7e91978..4dbd7b9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4337,6 +4337,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
INIT_LIST_HEAD(obj-ring_list);
INIT_LIST_HEAD(obj-obj_exec_link);
INIT_LIST_HEAD(obj-vma_list);
+   INIT_LIST_HEAD(obj-batch_pool_list);
 
obj-ops = ops;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c 
b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
new file mode 100644
index 000..6d526fa
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ 

[Intel-gfx] [PATCH 2/2] tests/gem_madvise: set execbuf.batch_len before doing an execbuf

2014-11-03 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

The command parser's batch_len optimization causes the parser to
reject this batch as not having an MI_BATCH_BUFFER_END because
the length was not set correctly.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 tests/gem_madvise.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/gem_madvise.c b/tests/gem_madvise.c
index 04a82aa..f95fbda 100644
--- a/tests/gem_madvise.c
+++ b/tests/gem_madvise.c
@@ -131,6 +131,7 @@ dontneed_before_exec(void)
 
execbuf.buffers_ptr = (uintptr_t)exec;
execbuf.buffer_count = 1;
+   execbuf.batch_len = sizeof(buf);
gem_execbuf(fd, execbuf);
 
gem_close(fd, exec.handle);
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/2] tests/gem_exec_parse: fix batch_len setting for cmd-crossing-page

2014-11-03 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

The size of the batch buffer passed to the kernel is significantly
larger than the size of the batch buffer passed to the function. A
proposed optimization as part of the batch copy kernel series is to
use batch_len for the copy and parse operations, which leads to a
false batch without MI_BATCH_BUFFER_END failure for this test.

To fix this, modify the test to set batch_start_offset and batch_len
such that they define the range of actual commands in the batch,
including a few of the surrounding nops for alignment purposes.

v2: update batch_start_offset as well

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 tests/gem_exec_parse.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c
index 1dc9103..e48b83a 100644
--- a/tests/gem_exec_parse.c
+++ b/tests/gem_exec_parse.c
@@ -144,16 +144,18 @@ static void exec_split_batch(int fd, uint32_t *cmds,
struct drm_i915_gem_exec_object2 objs[1];
uint32_t cmd_bo;
uint32_t noop[1024] = { 0 };
+   const int alloc_size = 4096 * 2;
+   const int actual_start_offset = 4096-sizeof(uint32_t);
 
// Allocate and fill a 2-page batch with noops
-   cmd_bo = gem_create(fd, 4096 * 2);
+   cmd_bo = gem_create(fd, alloc_size);
gem_write(fd, cmd_bo, 0, noop, sizeof(noop));
gem_write(fd, cmd_bo, 4096, noop, sizeof(noop));
 
// Write the provided commands such that the first dword
// of the command buffer is the last dword of the first
// page (i.e. the command is split across the two pages).
-   gem_write(fd, cmd_bo, 4096-sizeof(uint32_t), cmds, size);
+   gem_write(fd, cmd_bo, actual_start_offset, cmds, size);
 
objs[0].handle = cmd_bo;
objs[0].relocation_count = 0;
@@ -166,8 +168,14 @@ static void exec_split_batch(int fd, uint32_t *cmds,
 
execbuf.buffers_ptr = (uintptr_t)objs;
execbuf.buffer_count = 1;
-   execbuf.batch_start_offset = 0;
-   execbuf.batch_len = size;
+   // NB: We want batch_start_offset and batch_len to point to the block
+   // of the actual commands (i.e. at the last dword of the first page),
+   // but have to adjust both the start offset and length to meet the
+   // kernel driver's requirements on the alignment of those fields.
+   execbuf.batch_start_offset = actual_start_offset  ~0x7;
+   execbuf.batch_len =
+   ALIGN(size + actual_start_offset - execbuf.batch_start_offset,
+ 0x8);
execbuf.cliprects_ptr = 0;
execbuf.num_cliprects = 0;
execbuf.DR1 = 0;
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Abort command parsing for chained batches

2014-10-16 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

libva uses chained batch buffers in a way that the command parser
can't generally handle. Fortunately, libva doesn't need to write
registers from batch buffers in the way that mesa does, so this
patch causes the driver to fall back to non-secure dispatch if
the parser detects a chained batch buffer.

Testcase: igt/gem_exec_parse/chained-batch
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 18 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 24 +---
 2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 86b3ae0..ef38915 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -138,6 +138,11 @@ static const struct drm_i915_cmd_descriptor common_cmds[] 
= {
.mask = MI_GLOBAL_GTT,
.expected = 0,
  }},  ),
+   /*
+* MI_BATCH_BUFFER_START requires some special handling. It's not
+* really a 'skip' action but it doesn't seem like it's worth adding
+* a new action. See i915_parse_cmds().
+*/
CMD(  MI_BATCH_BUFFER_START,SMI,   !F,  0xFF,   S  ),
 };
 
@@ -955,7 +960,8 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  * Parses the specified batch buffer looking for privilege violations as
  * described in the overview.
  *
- * Return: non-zero if the parser finds violations or otherwise fails
+ * Return: non-zero if the parser finds violations or otherwise fails; -EACCES
+ * if the batch appears legal but should use hardware parsing
  */
 int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_gem_object *batch_obj,
@@ -1002,6 +1008,16 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
break;
}
 
+   /*
+* If the batch buffer contains a chained batch, return an
+* error that tells the caller to abort and dispatch the
+* workload as a non-secure batch.
+*/
+   if (desc-cmd.value == MI_BATCH_BUFFER_START) {
+   ret = -EACCES;
+   break;
+   }
+
if (desc-flags  CMD_DESC_FIXED)
length = desc-length.fixed;
else
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 1a0611b..1ed5702 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1368,17 +1368,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
  batch_obj,
  args-batch_start_offset,
  file-is_master);
-   if (ret)
-   goto err;
-
-   /*
-* XXX: Actually do this when enabling batch copy...
-*
-* Set the DISPATCH_SECURE bit to remove the NON_SECURE bit
-* from MI_BATCH_BUFFER_START commands issued in the
-* dispatch_execbuffer implementations. We specifically don't
-* want that set when the command parser is enabled.
-*/
+   if (ret) {
+   if (ret != -EACCES)
+   goto err;
+   } else {
+   /*
+* XXX: Actually do this when enabling batch copy...
+*
+* Set the DISPATCH_SECURE bit to remove the NON_SECURE 
bit
+* from MI_BATCH_BUFFER_START commands issued in the
+* dispatch_execbuffer implementations. We specifically 
don't
+* want that set when the command parser is enabled.
+*/
+   }
}
 
/* snb/ivb/vlv conflate the batch in ppgtt bit with the non-secure
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/2] tests/gem_exec_parse: test for chained batch buffers

2014-10-15 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

libva makes extensive use of chained batch buffers. The batch
buffer copy portion of the command parser has the potential to
break chained batches, so add a simple test to make sure that
doesn't happen.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 lib/intel_reg.h|   1 +
 tests/gem_exec_parse.c | 105 +
 2 files changed, 106 insertions(+)

diff --git a/lib/intel_reg.h b/lib/intel_reg.h
index f0fc5fd..fcc9d7c 100644
--- a/lib/intel_reg.h
+++ b/lib/intel_reg.h
@@ -2571,6 +2571,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define MI_BATCH_BUFFER_END(0xA  23)
 #define MI_BATCH_NON_SECURE(1)
 #define MI_BATCH_NON_SECURE_I965   (1  8)
+#define MI_BATCH_NON_SECURE_HSW(113) /* Additional bit for 
RCS */
 
 #define MAX_DISPLAY_PIPES  2
 
diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c
index 568bd4a..3ff6a66 100644
--- a/tests/gem_exec_parse.c
+++ b/tests/gem_exec_parse.c
@@ -183,6 +183,96 @@ static void exec_split_batch(int fd, uint32_t *cmds,
gem_close(fd, cmd_bo);
 }
 
+static void exec_batch_chained(int fd, uint32_t cmd_bo, uint32_t *cmds,
+  int size, int patch_offset,
+  uint64_t expected_value)
+{
+   struct drm_i915_gem_execbuffer2 execbuf;
+   struct drm_i915_gem_exec_object2 objs[3];
+   struct drm_i915_gem_relocation_entry reloc;
+   struct drm_i915_gem_relocation_entry first_level_reloc;
+
+   uint32_t target_bo = gem_create(fd, 4096);
+   uint32_t first_level_bo = gem_create(fd, 4096);
+   uint64_t actual_value = 0;
+
+   static uint32_t first_level_cmds[] = {
+   MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965,
+   0,
+   MI_BATCH_BUFFER_END,
+   0,
+   };
+
+   if (IS_HASWELL(intel_get_drm_devid(fd)))
+   first_level_cmds[0] |= MI_BATCH_NON_SECURE_HSW;
+
+   gem_write(fd, first_level_bo, 0,
+ first_level_cmds, sizeof(first_level_cmds));
+   gem_write(fd, cmd_bo, 0, cmds, size);
+
+   reloc.offset = patch_offset;
+   reloc.delta = 0;
+   reloc.target_handle = target_bo;
+   reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+   reloc.write_domain = I915_GEM_DOMAIN_RENDER;
+   reloc.presumed_offset = 0;
+
+   first_level_reloc.offset = 4;
+   first_level_reloc.delta = 0;
+   first_level_reloc.target_handle = cmd_bo;
+   first_level_reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+   first_level_reloc.write_domain = 0;
+   first_level_reloc.presumed_offset = 0;
+
+   objs[0].handle = target_bo;
+   objs[0].relocation_count = 0;
+   objs[0].relocs_ptr = 0;
+   objs[0].alignment = 0;
+   objs[0].offset = 0;
+   objs[0].flags = 0;
+   objs[0].rsvd1 = 0;
+   objs[0].rsvd2 = 0;
+
+   objs[1].handle = cmd_bo;
+   objs[1].relocation_count = 1;
+   objs[1].relocs_ptr = (uintptr_t)reloc;
+   objs[1].alignment = 0;
+   objs[1].offset = 0;
+   objs[1].flags = 0;
+   objs[1].rsvd1 = 0;
+   objs[1].rsvd2 = 0;
+
+   objs[2].handle = first_level_bo;
+   objs[2].relocation_count = 1;
+   objs[2].relocs_ptr = (uintptr_t)first_level_reloc;
+   objs[2].alignment = 0;
+   objs[2].offset = 0;
+   objs[2].flags = 0;
+   objs[2].rsvd1 = 0;
+   objs[2].rsvd2 = 0;
+
+   execbuf.buffers_ptr = (uintptr_t)objs;
+   execbuf.buffer_count = 3;
+   execbuf.batch_start_offset = 0;
+   execbuf.batch_len = sizeof(first_level_cmds);
+   execbuf.cliprects_ptr = 0;
+   execbuf.num_cliprects = 0;
+   execbuf.DR1 = 0;
+   execbuf.DR4 = 0;
+   execbuf.flags = I915_EXEC_RENDER;
+   i915_execbuffer2_set_context_id(execbuf, 0);
+   execbuf.rsvd2 = 0;
+
+   gem_execbuf(fd, execbuf);
+   gem_sync(fd, cmd_bo);
+
+   gem_read(fd,target_bo, 0, actual_value, sizeof(actual_value));
+   igt_assert_eq(expected_value, actual_value);
+
+   gem_close(fd, first_level_bo);
+   gem_close(fd, target_bo);
+}
+
 uint32_t handle;
 int fd;
 
@@ -366,6 +456,21 @@ igt_main
   -EINVAL);
}
 
+   igt_subtest(chained-batch) {
+   uint32_t pc[] = {
+   GFX_OP_PIPE_CONTROL,
+   PIPE_CONTROL_QW_WRITE,
+   0, // To be patched
+   0x1200,
+   0,
+   MI_BATCH_BUFFER_END,
+   };
+   exec_batch_chained(fd, handle,
+  pc, sizeof(pc),
+  8, // patch offset,
+  0x1200);
+   }
+
igt_fixture {
gem_close(fd, handle);
 
-- 
1.9.1

___
Intel-gfx mailing 

[Intel-gfx] [PATCH 1/2] tests/gem_exec_parse: fix batch_len setting for cmd-crossing-page

2014-10-15 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

The size of the batch buffer passed to the kernel is significantly
larger than the size of the batch buffer passed to the function. A
proposed optimization as part of the batch copy kernel series is to
use batch_len for the copy and parse operations, which leads to a
false batch without MI_BATCH_BUFFER_END failure for this test.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 tests/gem_exec_parse.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c
index 05f271c..568bd4a 100644
--- a/tests/gem_exec_parse.c
+++ b/tests/gem_exec_parse.c
@@ -144,9 +144,10 @@ static void exec_split_batch(int fd, uint32_t *cmds,
struct drm_i915_gem_exec_object2 objs[1];
uint32_t cmd_bo;
uint32_t noop[1024] = { 0 };
+   const int alloc_size = 4096 * 2;
 
// Allocate and fill a 2-page batch with noops
-   cmd_bo = gem_create(fd, 4096 * 2);
+   cmd_bo = gem_create(fd, alloc_size);
gem_write(fd, cmd_bo, 0, noop, sizeof(noop));
gem_write(fd, cmd_bo, 4096, noop, sizeof(noop));
 
@@ -167,7 +168,7 @@ static void exec_split_batch(int fd, uint32_t *cmds,
execbuf.buffers_ptr = (uintptr_t)objs;
execbuf.buffer_count = 1;
execbuf.batch_start_offset = 0;
-   execbuf.batch_len = size;
+   execbuf.batch_len = alloc_size;
execbuf.cliprects_ptr = 0;
execbuf.num_cliprects = 0;
execbuf.DR1 = 0;
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Don't leak command parser tables on suspend/resume

2014-09-22 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Ring init and cleanup are not balanced because we re-init the rings on
resume without having cleaned them up on suspend. This leads to the
driver leaking the parser's hash tables with a kmemleak signature such
as this:

unreferenced object 0x880405960980 (size 32):
  comm systemd-udevd, pid 516, jiffies 4294896961 (age 10202.044s)
  hex dump (first 32 bytes):
d0 85 46 c0 ff ff ff ff 00 00 00 00 00 00 00 00  ..F.
98 60 28 04 04 88 ff ff 00 00 00 00 00 00 00 00  .`(.
  backtrace:
[81816f9e] kmemleak_alloc+0x4e/0xb0
[811fa678] kmem_cache_alloc_trace+0x168/0x2f0
[c03e20a5] i915_cmd_parser_init_ring+0x2a5/0x3e0 [i915]
[c04088a2] intel_init_ring_buffer+0x202/0x470 [i915]
[c040c998] intel_init_vebox_ring_buffer+0x1e8/0x2b0 [i915]
[c03eff59] i915_gem_init_hw+0x2f9/0x3a0 [i915]
[c03f0057] i915_gem_init+0x57/0x1d0 [i915]
[c045e26a] i915_driver_load+0xc0a/0x10e0 [i915]
[c02e0d5d] drm_dev_register+0xad/0x100 [drm]
[c02e3b9f] drm_get_pci_dev+0x8f/0x200 [drm]
[c03c934b] i915_pci_probe+0x3b/0x60 [i915]
[81436725] local_pci_probe+0x45/0xa0
[81437a69] pci_device_probe+0xd9/0x130
[81524f4d] driver_probe_device+0x12d/0x3e0
[815252d3] __driver_attach+0x93/0xa0
[81522e1b] bus_for_each_dev+0x6b/0xb0

This patch extends the current convention of checking whether a
resource is already allocated before allocating it during ring init.
Longer term it might make sense to only init the rings once.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=83794
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---

According to the report in bugzilla, this happens in linux-next-20140919 as
well. I'm not sure what the path is for getting the fix there in addition to
nightly.

 drivers/gpu/drm/i915/i915_cmd_parser.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 4c35e2a..86b3ae0 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -709,11 +709,13 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs 
*ring)
BUG_ON(!validate_cmds_sorted(ring, cmd_tables, cmd_table_count));
BUG_ON(!validate_regs_sorted(ring));
 
-   ret = init_hash_table(ring, cmd_tables, cmd_table_count);
-   if (ret) {
-   DRM_ERROR(CMD: cmd_parser_init failed!\n);
-   fini_hash_table(ring);
-   return ret;
+   if (hash_empty(ring-cmd_hash)) {
+   ret = init_hash_table(ring, cmd_tables, cmd_table_count);
+   if (ret) {
+   DRM_ERROR(CMD: cmd_parser_init failed!\n);
+   fini_hash_table(ring);
+   return ret;
+   }
}
 
ring-needs_cmd_parser = true;
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/2] drm/i915: Log a message when rejecting LRM to OACONTROL

2014-09-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

The other paths in the command parser that reject a batch all
log a message indicating the reason. We simply missed this one.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index fb24dae..e1e7d37 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -886,8 +886,10 @@ static bool check_cmd(const struct intel_engine_cs *ring,
 * OACONTROL writes to only MI_LOAD_REGISTER_IMM commands.
 */
if (reg_addr == OACONTROL) {
-   if (desc-cmd.value == MI_LOAD_REGISTER_MEM)
+   if (desc-cmd.value == MI_LOAD_REGISTER_MEM) {
+   DRM_DEBUG_DRIVER(CMD: Rejected LRM to 
OACONTROL\n);
return false;
+   }
 
if (desc-cmd.value == MI_LOAD_REGISTER_IMM(1))
*oacontrol_set = (cmd[2] != 0);
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/2] drm/i915: Re-enable the command parser when using PPGTT

2014-09-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

In commit

commit 896ab1a5d54269b463a24194c2e4a369103b46d8
Author: Daniel Vetter daniel.vet...@ffwll.ch
Date:   Wed Aug 6 15:04:51 2014 +0200

drm/i915: Fix up checks for aliasing ppgtt

it looks like we accidentally inverted the check that the command
parser should only run when the driver enables some form of PPGTT.

Testcase: igt/gem_exec_parse
Cc: Daniel Vetter daniel.vet...@ffwll.ch
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---

At this point all platforms that use the command parser should have at
least aliasing PPGTT enabled I believe, so if you confirm then feel free
to delete the comment about VLV and make this check stricter - maybe
WARN_ON - when applying the patch.

 drivers/gpu/drm/i915/i915_cmd_parser.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index c45856b..fb24dae 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -850,7 +850,7 @@ bool i915_needs_cmd_parser(struct intel_engine_cs *ring)
 * disabled. That will cause all of the parser's PPGTT checks to
 * fail. For now, disable parsing when PPGTT is off.
 */
-   if (USES_PPGTT(ring-dev))
+   if (!USES_PPGTT(ring-dev))
return false;
 
return (i915.enable_cmd_parser == 1);
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 1/5] drm/i915: Implement a framework for batch buffer pools

2014-07-09 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This adds a small module for managing a pool of batch buffers.
The only current use case is for the command parser, as described
in the kerneldoc in the patch. The code is simple, but separating
it out makes it easier to change the underlying algorithms and to
extend to future use cases should they arise.

The interface is simple: init to create an empty pool, fini to
clean it up; get to obtain a new buffer, put to return it to the
pool. Note that all buffers must be returned to the pool before
freeing it.

Buffers are purgeable while in the pool, but not explicitly
truncated in order to avoid overhead during execbuf.

Locking is currently based on the caller holding the struct_mutex.
We already do that in the places where we will use the batch pool
for the command parser.

v2:
- s/BUG_ON/WARN_ON/ for locking assertions
- Remove the cap on pool size
- Switch from alloc/free to init/fini

v3:
- Idiomatic looping structure in _fini
- Correct handling of purged objects
- Don't return a buffer that's too much larger than needed

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 Documentation/DocBook/drm.tmpl |   5 +
 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/i915_drv.h|  17 
 drivers/gpu/drm/i915/i915_gem.c|   1 +
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 153 +
 5 files changed, 177 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c

diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 4890d94..2749555 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -3945,6 +3945,11 @@ int num_ioctls;/synopsis
 !Pdrivers/gpu/drm/i915/i915_cmd_parser.c batch buffer command parser
 !Idrivers/gpu/drm/i915/i915_cmd_parser.c
   /sect2
+  sect2
+titleBatchbuffer Pools/title
+!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool
+!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c
+  /sect2
 /sect1
   /chapter
 /part
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index cad1683..b92fbe6 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -17,6 +17,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
 
 # GEM code
 i915-y += i915_cmd_parser.o \
+ i915_gem_batch_pool.o \
  i915_gem_context.o \
  i915_gem_render_state.o \
  i915_gem_debug.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 90216bb..a478a96 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1062,6 +1062,12 @@ struct intel_l3_parity {
int which_slice;
 };
 
+struct i915_gem_batch_pool {
+   struct drm_device *dev;
+   struct list_head active_list;
+   struct list_head inactive_list;
+};
+
 struct i915_gem_mm {
/** Memory allocator for GTT stolen memory */
struct drm_mm stolen;
@@ -1690,6 +1696,8 @@ struct drm_i915_gem_object {
/** Used in execbuf to temporarily hold a ref */
struct list_head obj_exec_link;
 
+   struct list_head batch_pool_list;
+
/**
 * This is set if the object is on the active lists (has pending
 * rendering and so a non-zero seqno), and is not set if it i s on
@@ -2594,6 +2602,15 @@ void i915_destroy_error_state(struct drm_device *dev);
 void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone);
 const char *i915_cache_level_str(int type);
 
+/* i915_gem_batch_pool.c */
+void i915_gem_batch_pool_init(struct drm_device *dev,
+ struct i915_gem_batch_pool *pool);
+void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
+struct drm_i915_gem_object*
+i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
+void i915_gem_batch_pool_put(struct i915_gem_batch_pool *pool,
+struct drm_i915_gem_object *obj);
+
 /* i915_cmd_parser.c */
 int i915_cmd_parser_get_version(void);
 int i915_cmd_parser_init_ring(struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e5d4d73..89a4ec0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4332,6 +4332,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
INIT_LIST_HEAD(obj-ring_list);
INIT_LIST_HEAD(obj-obj_exec_link);
INIT_LIST_HEAD(obj-vma_list);
+   INIT_LIST_HEAD(obj-batch_pool_list);
 
obj-ops = ops;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c 
b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
new file mode 100644
index 000..6d526fa
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated 

[Intel-gfx] [PATCH 6/5] drm/i915: Add batch pool details to i915_gem_objects debugfs

2014-07-09 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

To better account for the potentially large memory consumption
of the batch pool.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c | 45 +
 1 file changed, 36 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 696eb98..d4ec4ec 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -360,6 +360,38 @@ static int per_file_stats(int id, void *ptr, void *data)
return 0;
 }
 
+#define print_file_stats(m, name, stats) \
+   seq_printf(m, %s: %u objects, %zu bytes (%zu active, %zu inactive, %zu 
global, %zu shared, %zu unbound)\n, \
+  name, \
+  stats.count, \
+  stats.total, \
+  stats.active, \
+  stats.inactive, \
+  stats.global, \
+  stats.shared, \
+  stats.unbound)
+
+static void print_batch_pool_stats(struct seq_file *m,
+  struct drm_i915_private *dev_priv)
+{
+   struct drm_i915_gem_object *obj;
+   struct file_stats stats;
+
+   memset(stats, 0, sizeof(stats));
+
+   list_for_each_entry(obj,
+   dev_priv-mm.batch_pool.active_list,
+   batch_pool_list)
+   per_file_stats(0, obj, stats);
+
+   list_for_each_entry(obj,
+   dev_priv-mm.batch_pool.inactive_list,
+   batch_pool_list)
+   per_file_stats(0, obj, stats);
+
+   print_file_stats(m, batch pool, stats);
+}
+
 #define count_vmas(list, member) do { \
list_for_each_entry(vma, list, member) { \
size += i915_gem_obj_ggtt_size(vma-obj); \
@@ -442,6 +474,9 @@ static int i915_gem_object_info(struct seq_file *m, void* 
data)
   dev_priv-gtt.mappable_end - dev_priv-gtt.base.start);
 
seq_putc(m, '\n');
+   print_batch_pool_stats(m, dev_priv);
+
+   seq_putc(m, '\n');
list_for_each_entry_reverse(file, dev-filelist, lhead) {
struct file_stats stats;
struct task_struct *task;
@@ -459,15 +494,7 @@ static int i915_gem_object_info(struct seq_file *m, void* 
data)
 */
rcu_read_lock();
task = pid_task(file-pid, PIDTYPE_PID);
-   seq_printf(m, %s: %u objects, %zu bytes (%zu active, %zu 
inactive, %zu global, %zu shared, %zu unbound)\n,
-  task ? task-comm : unknown,
-  stats.count,
-  stats.total,
-  stats.active,
-  stats.inactive,
-  stats.global,
-  stats.shared,
-  stats.unbound);
+   print_file_stats(m, task ? task-comm : unknown, stats);
rcu_read_unlock();
}
 
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 4/5] drm/i915: Dispatch the shadow batch buffer

2014-07-08 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This is useful for testing the batch pool code with aliasing PPGTT.
It doesn't work with full PPGTT though; the GPU hangs and the whole
UI is corrupted. We need fixes for the secure dispatch path to
enable this for real.

v2: rebase on shadow_batch_obj replacing batch_obj

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 4c4bd66..908cf48 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1401,13 +1401,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
batch_obj = shadow_batch_obj;
 
/*
-* XXX: Actually do this when enabling batch copy...
-*
 * Set the DISPATCH_SECURE bit to remove the NON_SECURE bit
 * from MI_BATCH_BUFFER_START commands issued in the
 * dispatch_execbuffer implementations. We specifically don't
 * want that set when the command parser is enabled.
 */
+   flags |= I915_DISPATCH_SECURE;
}
 
/* snb/ivb/vlv conflate the batch in ppgtt bit with the non-secure
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 0/5] Command parser batch buffer copy

2014-07-08 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This is v2 of the series I sent here:
http://lists.freedesktop.org/archives/intel-gfx/2014-June/047609.html

I believe that I've addressed all of the feedback except
* I didn't move the allocation of the shadow batch buffer into parse_cmds(). It
  didn't seem like it added much value and would maybe complicate the error
  handling in do_execbuffer().
* I kept the part about attaching the shadow batch to the request, though in
  perhaps a less invasive way. My concern here is with the scheduler possibly
  reordering requests, I don't know if we'd still be able to implement the busy
  tracking in the pool as suggested.

The commit message for patch 4 still applies: we aren't ready for that change
until the secure dispatch regression is resolved, but it's needed for testing.

I've added patch 5 to use batch_len instead of object size, as an optimization.
My testing didn't show any perf difference, but I don't have any libva
benchmarks to run, and that's where it sounded like the issue would be. I just
tacked the patch onto the end of the series rather than squashing it in so we
can easily take it or leave it as desired.

Brad Volkin (5):
  drm/i915: Implement a framework for batch buffer pools
  drm/i915: Use batch pools with the command parser
  drm/i915: Add a batch pool debugfs file
  drm/i915: Dispatch the shadow batch buffer
  drm/i915: Use batch length instead of object size in command parser

 Documentation/DocBook/drm.tmpl |   5 ++
 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/i915_cmd_parser.c |  88 +++
 drivers/gpu/drm/i915/i915_debugfs.c|  41 +
 drivers/gpu/drm/i915/i915_dma.c|   1 +
 drivers/gpu/drm/i915/i915_drv.h|  26 ++
 drivers/gpu/drm/i915/i915_gem.c|  10 +++
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 133 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  39 -
 9 files changed, 325 insertions(+), 19 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c

-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 1/5] drm/i915: Implement a framework for batch buffer pools

2014-07-08 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This adds a small module for managing a pool of batch buffers.
The only current use case is for the command parser, as described
in the kerneldoc in the patch. The code is simple, but separating
it out makes it easier to change the underlying algorithms and to
extend to future use cases should they arise.

The interface is simple: init to create an empty pool, fini to
clean it up; get to obtain a new buffer, put to return it to the
pool. Note that all buffers must be returned to the pool before
freeing it.

Buffers are purgeable while in the pool, but not explicitly
truncated in order to avoid overhead during execbuf.

Locking is currently based on the caller holding the struct_mutex.
We already do that in the places where we will use the batch pool
for the command parser.

v2:
- s/BUG_ON/WARN_ON/ for locking assertions
- Remove the cap on pool size
- Switch from alloc/free to init/fini

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 Documentation/DocBook/drm.tmpl |   5 ++
 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/i915_drv.h|  17 
 drivers/gpu/drm/i915/i915_gem.c|   1 +
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 133 +
 5 files changed, 157 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c

diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 4890d94..2749555 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -3945,6 +3945,11 @@ int num_ioctls;/synopsis
 !Pdrivers/gpu/drm/i915/i915_cmd_parser.c batch buffer command parser
 !Idrivers/gpu/drm/i915/i915_cmd_parser.c
   /sect2
+  sect2
+titleBatchbuffer Pools/title
+!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool
+!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c
+  /sect2
 /sect1
   /chapter
 /part
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index cad1683..b92fbe6 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -17,6 +17,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
 
 # GEM code
 i915-y += i915_cmd_parser.o \
+ i915_gem_batch_pool.o \
  i915_gem_context.o \
  i915_gem_render_state.o \
  i915_gem_debug.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 90216bb..a478a96 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1062,6 +1062,12 @@ struct intel_l3_parity {
int which_slice;
 };
 
+struct i915_gem_batch_pool {
+   struct drm_device *dev;
+   struct list_head active_list;
+   struct list_head inactive_list;
+};
+
 struct i915_gem_mm {
/** Memory allocator for GTT stolen memory */
struct drm_mm stolen;
@@ -1690,6 +1696,8 @@ struct drm_i915_gem_object {
/** Used in execbuf to temporarily hold a ref */
struct list_head obj_exec_link;
 
+   struct list_head batch_pool_list;
+
/**
 * This is set if the object is on the active lists (has pending
 * rendering and so a non-zero seqno), and is not set if it i s on
@@ -2594,6 +2602,15 @@ void i915_destroy_error_state(struct drm_device *dev);
 void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone);
 const char *i915_cache_level_str(int type);
 
+/* i915_gem_batch_pool.c */
+void i915_gem_batch_pool_init(struct drm_device *dev,
+ struct i915_gem_batch_pool *pool);
+void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
+struct drm_i915_gem_object*
+i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
+void i915_gem_batch_pool_put(struct i915_gem_batch_pool *pool,
+struct drm_i915_gem_object *obj);
+
 /* i915_cmd_parser.c */
 int i915_cmd_parser_get_version(void);
 int i915_cmd_parser_init_ring(struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e5d4d73..89a4ec0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4332,6 +4332,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
INIT_LIST_HEAD(obj-ring_list);
INIT_LIST_HEAD(obj-obj_exec_link);
INIT_LIST_HEAD(obj-vma_list);
+   INIT_LIST_HEAD(obj-batch_pool_list);
 
obj-ops = ops;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c 
b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
new file mode 100644
index 000..542477f
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, 

[Intel-gfx] [PATCH v2 2/5] drm/i915: Use batch pools with the command parser

2014-07-08 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This patch sets up all of the tracking and copying necessary to
use batch pools with the command parser, but does not actually
dispatch the copied (shadow) batch to the hardware yet. We still
aren't quite ready to set the secure bit during dispatch.

Note that performance takes a hit from the copy in some cases
and will likely need some work. At a rough pass, the memcpy
appears to be the bottleneck. Without having done a deeper
analysis, two ideas that come to mind are:
1) Copy sections of the batch at a time, as they are reached
   by parsing. Might improve cache locality.
2) Copy only up to the userspace-supplied batch length and
   memset the rest of the buffer. Reduces the number of reads.

v2:
- Remove setting the capacity of the pool
- One global pool instead of per-ring pools
- Replace batch_obj with shadow_batch_obj and hook into eb-vmas
- Memset any space in the shadow batch beyond what gets copied
- Rebased on execlist prep refactoring

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 84 --
 drivers/gpu/drm/i915/i915_dma.c|  1 +
 drivers/gpu/drm/i915/i915_drv.h|  8 +++
 drivers/gpu/drm/i915/i915_gem.c|  9 
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 35 +
 5 files changed, 121 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index dea99d9..18788df 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -831,6 +831,56 @@ finish:
return (u32*)addr;
 }
 
+/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
+static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
+  struct drm_i915_gem_object *src_obj)
+{
+   int ret = 0;
+   int needs_clflush = 0;
+   u32 *src_addr, *dest_addr = NULL;
+
+   ret = i915_gem_obj_prepare_shmem_read(src_obj, needs_clflush);
+   if (ret) {
+   DRM_DEBUG_DRIVER(CMD: failed to prep read\n);
+   return ERR_PTR(ret);
+   }
+
+   src_addr = vmap_batch(src_obj);
+   if (!src_addr) {
+   DRM_DEBUG_DRIVER(CMD: Failed to vmap batch\n);
+   ret = -ENOMEM;
+   goto unpin_src;
+   }
+
+   if (needs_clflush)
+   drm_clflush_virt_range((char *)src_addr, src_obj-base.size);
+
+   ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
+   if (ret) {
+   DRM_DEBUG_DRIVER(CMD: Failed to set batch CPU domain\n);
+   goto unmap_src;
+   }
+
+   dest_addr = vmap_batch(dest_obj);
+   if (!dest_addr) {
+   DRM_DEBUG_DRIVER(CMD: Failed to vmap shadow batch\n);
+   ret = -ENOMEM;
+   goto unmap_src;
+   }
+
+   memcpy(dest_addr, src_addr, src_obj-base.size);
+   if (dest_obj-base.size  src_obj-base.size)
+   memset((u8 *)dest_addr + src_obj-base.size, 0,
+  dest_obj-base.size - src_obj-base.size);
+
+unmap_src:
+   vunmap(src_addr);
+unpin_src:
+   i915_gem_object_unpin_pages(src_obj);
+
+   return ret ? ERR_PTR(ret) : dest_addr;
+}
+
 /**
  * i915_needs_cmd_parser() - should a given ring use software command parsing?
  * @ring: the ring in question
@@ -952,6 +1002,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  * i915_parse_cmds() - parse a submitted batch buffer for privilege violations
  * @ring: the ring on which the batch is to execute
  * @batch_obj: the batch buffer in question
+ * @shadow_batch_obj: copy of the batch buffer in question
  * @batch_start_offset: byte offset in the batch at which execution starts
  * @is_master: is the submitting process the drm master?
  *
@@ -962,32 +1013,28 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  */
 int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_gem_object *batch_obj,
+   struct drm_i915_gem_object *shadow_batch_obj,
u32 batch_start_offset,
bool is_master)
 {
int ret = 0;
u32 *cmd, *batch_base, *batch_end;
struct drm_i915_cmd_descriptor default_desc = { 0 };
-   int needs_clflush = 0;
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
-   ret = i915_gem_obj_prepare_shmem_read(batch_obj, needs_clflush);
-   if (ret) {
-   DRM_DEBUG_DRIVER(CMD: failed to prep read\n);
-   return ret;
+   batch_base = copy_batch(shadow_batch_obj, batch_obj);
+   if (IS_ERR(batch_base)) {
+   DRM_DEBUG_DRIVER(CMD: Failed to copy batch\n);
+   return PTR_ERR(batch_base);
}
 
-   batch_base = vmap_batch(batch_obj);
-   if (!batch_base) {
-   DRM_DEBUG_DRIVER(CMD: Failed to vmap batch\n);
-   

[Intel-gfx] [PATCH v2 5/5] drm/i915: Use batch length instead of object size in command parser

2014-07-08 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Previously we couldn't trust the user-supplied batch length because
it came directly from userspace (i.e. untrusted code). It would have
affected what commands software parsed without regard to what hardware
would actually execute, leaving a potential hole.

With the parser now copying the user supplied batch buffer and writing
MI_NOP commands to any space after the copied region, we can safely use
the batch length input. This should be a performance win as the actual
batch length is frequently much smaller than the allocated object size.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 20 +++-
 drivers/gpu/drm/i915/i915_drv.h|  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  1 +
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 18788df..2470d3b 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -833,7 +833,8 @@ finish:
 
 /* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
 static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
-  struct drm_i915_gem_object *src_obj)
+  struct drm_i915_gem_object *src_obj,
+  u32 batch_len)
 {
int ret = 0;
int needs_clflush = 0;
@@ -853,7 +854,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
}
 
if (needs_clflush)
-   drm_clflush_virt_range((char *)src_addr, src_obj-base.size);
+   drm_clflush_virt_range((char *)src_addr, batch_len);
 
ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
if (ret) {
@@ -868,10 +869,10 @@ static u32 *copy_batch(struct drm_i915_gem_object 
*dest_obj,
goto unmap_src;
}
 
-   memcpy(dest_addr, src_addr, src_obj-base.size);
-   if (dest_obj-base.size  src_obj-base.size)
-   memset((u8 *)dest_addr + src_obj-base.size, 0,
-  dest_obj-base.size - src_obj-base.size);
+   memcpy(dest_addr, src_addr, batch_len);
+   if (dest_obj-base.size  batch_len)
+   memset((u8 *)dest_addr + batch_len, 0,
+  dest_obj-base.size - batch_len);
 
 unmap_src:
vunmap(src_addr);
@@ -1015,6 +1016,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_gem_object *batch_obj,
struct drm_i915_gem_object *shadow_batch_obj,
u32 batch_start_offset,
+   u32 batch_len,
bool is_master)
 {
int ret = 0;
@@ -1022,7 +1024,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_cmd_descriptor default_desc = { 0 };
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
-   batch_base = copy_batch(shadow_batch_obj, batch_obj);
+   batch_base = copy_batch(shadow_batch_obj, batch_obj, batch_len);
if (IS_ERR(batch_base)) {
DRM_DEBUG_DRIVER(CMD: Failed to copy batch\n);
return PTR_ERR(batch_base);
@@ -1031,11 +1033,11 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
cmd = batch_base + (batch_start_offset / sizeof(*cmd));
 
/*
-* We use the source object's size because the shadow object is as
+* We use the batch length as size because the shadow object is as
 * large or larger and copy_batch() will write MI_NOPs to the extra
 * space. Parsing should be faster in some cases this way.
 */
-   batch_end = cmd + (batch_obj-base.size / sizeof(*batch_end));
+   batch_end = cmd + (batch_len / sizeof(*batch_end));
 
while (cmd  batch_end) {
const struct drm_i915_cmd_descriptor *desc;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a6b903d..49bcf79 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2627,6 +2627,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_gem_object *batch_obj,
struct drm_i915_gem_object *shadow_batch_obj,
u32 batch_start_offset,
+   u32 batch_len,
bool is_master);
 
 /* i915_suspend.c */
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 908cf48..69ce030 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1388,6 +1388,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
  batch_obj,
  shadow_batch_obj,
  args-batch_start_offset,
+ args-batch_len,

[Intel-gfx] [PATCH v2 3/5] drm/i915: Add a batch pool debugfs file

2014-07-08 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

It provides some useful information about the buffers in
the global command parser batch pool.

v2: rebase on global pool instead of per-ring pools

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c | 41 +
 1 file changed, 41 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index b3b56c4..696eb98 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -568,6 +568,46 @@ static int i915_gem_pageflip_info(struct seq_file *m, void 
*data)
return 0;
 }
 
+static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
+{
+   struct drm_info_node *node = m-private;
+   struct drm_device *dev = node-minor-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct drm_i915_gem_object *obj;
+   int count = 0;
+   int ret;
+
+   ret = mutex_lock_interruptible(dev-struct_mutex);
+   if (ret)
+   return ret;
+
+   seq_puts(m, active:\n);
+   list_for_each_entry(obj,
+   dev_priv-mm.batch_pool.active_list,
+   batch_pool_list) {
+   seq_puts(m,);
+   describe_obj(m, obj);
+   seq_putc(m, '\n');
+   count++;
+   }
+
+   seq_puts(m, inactive:\n);
+   list_for_each_entry(obj,
+   dev_priv-mm.batch_pool.inactive_list,
+   batch_pool_list) {
+   seq_puts(m,);
+   describe_obj(m, obj);
+   seq_putc(m, '\n');
+   count++;
+   }
+
+   seq_printf(m, total: %d\n, count);
+
+   mutex_unlock(dev-struct_mutex);
+
+   return 0;
+}
+
 static int i915_gem_request_info(struct seq_file *m, void *data)
 {
struct drm_info_node *node = m-private;
@@ -3950,6 +3990,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{i915_gem_hws_blt, i915_hws_info, 0, (void *)BCS},
{i915_gem_hws_bsd, i915_hws_info, 0, (void *)VCS},
{i915_gem_hws_vebox, i915_hws_info, 0, (void *)VECS},
+   {i915_gem_batch_pool, i915_gem_batch_pool_info, 0},
{i915_rstdby_delays, i915_rstdby_delays, 0},
{i915_frequency_info, i915_frequency_info, 0},
{i915_delayfreq_table, i915_delayfreq_table, 0},
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 1/4] drm/i915: Implement a framework for batch buffer pools

2014-06-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This adds a small module for managing a pool of batch buffers.
The only current use case is for the command parser, as described
in the kerneldoc in the patch. The code is simple, but separating
it out makes it easier to change the underlying algorithms and to
extend to future use cases should they arise.

The interface is simple: alloc to create an empty pool, free to
clean it up; get to obtain a new buffer, put to return it to the
pool. Note that all buffers must be returned to the pool before
freeing it.

The pool has a maximum number of buffers allowed due to some tests
(e.g. gem_exec_nop) creating a very large number of buffers (e.g.
___). Buffers are purgeable while in the pool, but not explicitly
truncated in order to avoid overhead during execbuf.

Locking is currently based on the caller holding the struct_mutex.
We already do that in the places where we will use the batch pool
for the command parser.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---

r.e. pool capacity
My original testing showed something like thousands of buffers in
the pool after a gem_exec_nop run. But when I reran with the max
check disabled just now to get an actual number for the commit
message, the number was more like 130.  I developed and tested the
changes incrementally, and suspect that the original run was before
I implemented the actual copy operation. So I'm inclined to remove
or at least increase the cap in the final version. Thoughts?

---
 Documentation/DocBook/drm.tmpl |   5 +
 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/i915_drv.h|  19 
 drivers/gpu/drm/i915/i915_gem.c|   1 +
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 151 +
 5 files changed, 177 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c

diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 7df3134..fcc0a1c 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -3939,6 +3939,11 @@ int num_ioctls;/synopsis
 !Pdrivers/gpu/drm/i915/i915_cmd_parser.c batch buffer command parser
 !Idrivers/gpu/drm/i915/i915_cmd_parser.c
   /sect2
+  sect2
+titleBatchbuffer Pools/title
+!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool
+!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c
+  /sect2
 /sect1
   /chapter
 /part
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index cad1683..b92fbe6 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -17,6 +17,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
 
 # GEM code
 i915-y += i915_cmd_parser.o \
+ i915_gem_batch_pool.o \
  i915_gem_context.o \
  i915_gem_render_state.o \
  i915_gem_debug.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0640071..2a88b5e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1610,6 +1610,8 @@ struct drm_i915_gem_object {
/** Used in execbuf to temporarily hold a ref */
struct list_head obj_exec_link;
 
+   struct list_head batch_pool_list;
+
/**
 * This is set if the object is on the active lists (has pending
 * rendering and so a non-zero seqno), and is not set if it i s on
@@ -1727,6 +1729,14 @@ struct drm_i915_gem_object {
 };
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
+struct i915_gem_batch_pool {
+   struct drm_device *dev;
+   struct list_head active_list;
+   struct list_head inactive_list;
+   int count;
+   int max_count;
+};
+
 /**
  * Request queue structure.
  *
@@ -2508,6 +2518,15 @@ void i915_destroy_error_state(struct drm_device *dev);
 void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone);
 const char *i915_cache_level_str(int type);
 
+/* i915_gem_batch_pool.c */
+struct i915_gem_batch_pool *i915_gem_batch_pool_alloc(struct drm_device *dev,
+ int max_count);
+void i915_gem_batch_pool_free(struct i915_gem_batch_pool *pool);
+struct drm_i915_gem_object*
+i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
+void i915_gem_batch_pool_put(struct i915_gem_batch_pool *pool,
+struct drm_i915_gem_object *obj);
+
 /* i915_cmd_parser.c */
 int i915_cmd_parser_get_version(void);
 int i915_cmd_parser_init_ring(struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d857f58..d5e3001 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4324,6 +4324,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
INIT_LIST_HEAD(obj-ring_list);
INIT_LIST_HEAD(obj-obj_exec_link);
INIT_LIST_HEAD(obj-vma_list);
+   INIT_LIST_HEAD(obj-batch_pool_list);
 

[Intel-gfx] [RFC 2/4] drm/i915: Use batch pools with the command parser

2014-06-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This patch sets up all of the tracking and copying necessary to
use batch pools with the command parser, but does not actually
dispatch the copied (shadow) batch to the hardware yet. We still
aren't quite ready to set the secure bit during dispatch.

Note that performance takes a hit from the copy in some cases
and will likely need some work. At a rough pass, the memcpy
appears to be the bottleneck. Without having done a deeper
analysis, two ideas that come to mind are:
1) Copy sections of the batch at a time, as they are reached
   by parsing. Might improve cache locality.
2) Copy only up to the userspace-supplied batch length and
   memset the rest of the buffer. Reduces the number of reads.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c   | 75 ++--
 drivers/gpu/drm/i915/i915_drv.h  |  7 ++-
 drivers/gpu/drm/i915/i915_gem.c  |  8 ++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   | 45 +++--
 drivers/gpu/drm/i915/i915_gem_render_state.c |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c  | 12 +
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  7 +++
 7 files changed, 134 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index dea99d9..669afb0 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -831,6 +831,53 @@ finish:
return (u32*)addr;
 }
 
+/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
+static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
+  struct drm_i915_gem_object *src_obj)
+{
+   int ret = 0;
+   int needs_clflush = 0;
+   u32 *src_addr, *dest_addr = NULL;
+
+   ret = i915_gem_obj_prepare_shmem_read(src_obj, needs_clflush);
+   if (ret) {
+   DRM_DEBUG_DRIVER(CMD: failed to prep read\n);
+   return ERR_PTR(ret);
+   }
+
+   src_addr = vmap_batch(src_obj);
+   if (!src_addr) {
+   DRM_DEBUG_DRIVER(CMD: Failed to vmap batch\n);
+   ret = -ENOMEM;
+   goto unpin_src;
+   }
+
+   if (needs_clflush)
+   drm_clflush_virt_range((char *)src_addr, src_obj-base.size);
+
+   ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
+   if (ret) {
+   DRM_DEBUG_DRIVER(CMD: Failed to set batch CPU domain\n);
+   goto unmap_src;
+   }
+
+   dest_addr = vmap_batch(dest_obj);
+   if (!dest_addr) {
+   DRM_DEBUG_DRIVER(CMD: Failed to vmap shadow batch\n);
+   ret = -ENOMEM;
+   goto unmap_src;
+   }
+
+   memcpy(dest_addr, src_addr, src_obj-base.size);
+
+unmap_src:
+   vunmap(src_addr);
+unpin_src:
+   i915_gem_object_unpin_pages(src_obj);
+
+   return ret ? ERR_PTR(ret) : dest_addr;
+}
+
 /**
  * i915_needs_cmd_parser() - should a given ring use software command parsing?
  * @ring: the ring in question
@@ -952,6 +999,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  * i915_parse_cmds() - parse a submitted batch buffer for privilege violations
  * @ring: the ring on which the batch is to execute
  * @batch_obj: the batch buffer in question
+ * @shadow_batch_obj: copy of the batch buffer in question
  * @batch_start_offset: byte offset in the batch at which execution starts
  * @is_master: is the submitting process the drm master?
  *
@@ -962,31 +1010,21 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  */
 int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_gem_object *batch_obj,
+   struct drm_i915_gem_object *shadow_batch_obj,
u32 batch_start_offset,
bool is_master)
 {
int ret = 0;
u32 *cmd, *batch_base, *batch_end;
struct drm_i915_cmd_descriptor default_desc = { 0 };
-   int needs_clflush = 0;
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
-   ret = i915_gem_obj_prepare_shmem_read(batch_obj, needs_clflush);
-   if (ret) {
-   DRM_DEBUG_DRIVER(CMD: failed to prep read\n);
-   return ret;
-   }
-
-   batch_base = vmap_batch(batch_obj);
-   if (!batch_base) {
-   DRM_DEBUG_DRIVER(CMD: Failed to vmap batch\n);
-   i915_gem_object_unpin_pages(batch_obj);
-   return -ENOMEM;
+   batch_base = copy_batch(shadow_batch_obj, batch_obj);
+   if (IS_ERR(batch_base)) {
+   DRM_DEBUG_DRIVER(CMD: Failed to copy batch\n);
+   return PTR_ERR(batch_base);
}
 
-   if (needs_clflush)
-   drm_clflush_virt_range((char *)batch_base, 
batch_obj-base.size);
-
cmd = batch_base + (batch_start_offset / sizeof(*cmd));
batch_end = cmd + (batch_obj-base.size / 

[Intel-gfx] [RFC 4/4] drm/i915: Dispatch the shadow batch buffer

2014-06-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This is useful for testing the batch pool code with aliasing PPGTT.
It doesn't work with full PPGTT though; the GPU hangs and the whole
UI is corrupted. We need fixes for the secure dispatch path to
enable this for real.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 37 --
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0b263aa..981f66b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1321,31 +1321,34 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
goto err;
 
/*
-* XXX: Actually do this when enabling batch copy...
-*
 * Set the DISPATCH_SECURE bit to remove the NON_SECURE bit
 * from MI_BATCH_BUFFER_START commands issued in the
 * dispatch_execbuffer implementations. We specifically don't
 * want that set when the command parser is enabled.
 */
+   flags |= I915_DISPATCH_SECURE;
}
 
-   /* snb/ivb/vlv conflate the batch in ppgtt bit with the non-secure
-* batch bit. Hence we need to pin secure batches into the global gtt.
-* hsw should have this fixed, but bdw mucks it up again. */
-   if (flags  I915_DISPATCH_SECURE 
-   !batch_obj-has_global_gtt_mapping) {
-   /* When we have multiple VMs, we'll need to make sure that we
-* allocate space first */
-   struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
-   BUG_ON(!vma);
-   vma-bind_vma(vma, batch_obj-cache_level, GLOBAL_BIND);
-   }
+   if (!shadow_batch_obj) {
+   /* snb/ivb/vlv conflate the batch in ppgtt bit with the 
non-secure
+* batch bit. Hence we need to pin secure batches into the 
global gtt.
+* hsw should have this fixed, but bdw mucks it up again. */
+   if (flags  I915_DISPATCH_SECURE 
+   !batch_obj-has_global_gtt_mapping) {
+   /* When we have multiple VMs, we'll need to make sure 
that we
+* allocate space first */
+   struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
+   BUG_ON(!vma);
+   vma-bind_vma(vma, batch_obj-cache_level, GLOBAL_BIND);
+   }
 
-   if (flags  I915_DISPATCH_SECURE)
-   exec_start += i915_gem_obj_ggtt_offset(batch_obj);
-   else
-   exec_start += i915_gem_obj_offset(batch_obj, vm);
+   if (flags  I915_DISPATCH_SECURE)
+   exec_start += i915_gem_obj_ggtt_offset(batch_obj);
+   else
+   exec_start += i915_gem_obj_offset(batch_obj, vm);
+   } else {
+   exec_start += i915_gem_obj_ggtt_offset(shadow_batch_obj);
+   }
 
ret = i915_gem_execbuffer_move_to_gpu(ring, eb-vmas);
if (ret)
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Add some L3 registers to the parser whitelist

2014-06-17 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Beignet needs these in order to program the L3 cache config for
OpenCL workloads, particularly when using SLM.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 3 +++
 drivers/gpu/drm/i915/i915_reg.h| 2 ++
 2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 9d79543..dea99d9 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -426,6 +426,9 @@ static const u32 gen7_render_regs[] = {
GEN7_SO_WRITE_OFFSET(1),
GEN7_SO_WRITE_OFFSET(2),
GEN7_SO_WRITE_OFFSET(3),
+   GEN7_L3SQCREG1,
+   GEN7_L3CNTLREG2,
+   GEN7_L3CNTLREG3,
 };
 
 static const u32 gen7_blt_regs[] = {
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e1fb0f2..3488567 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4670,6 +4670,8 @@ enum punit_power_well {
 #define GEN7_L3CNTLREG10xB01C
 #define  GEN7_WA_FOR_GEN7_L3_CONTROL   0x3C47FF8C
 #define  GEN7_L3AGDIS  (119)
+#define GEN7_L3CNTLREG20xB020
+#define GEN7_L3CNTLREG30xB024
 
 #define GEN7_L3_CHICKEN_MODE_REGISTER  0xB030
 #define  GEN7_WA_L3_CHICKEN_MODE   0x2000
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Only check PPGTT bits when using PPGTT

2014-05-28 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This extends use of the command parser to VLV.

Note that the patch checks that the PPGTT bit is set appropriately when
PPGTT is enabled but ignores it when PPGTT is disabled. It would be
awkward to correctly invert the expected value to check that the bit is
set appropriately in that case, and of limited value anyhow.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---

I've confirmed that the shmem pread setup stuff we added does fix the
caching issues I saw previously. I've done some basic testing with this
on both IVB and VLV and don't see regressions. I don't have any data on
the VLV perf impact though.

Also, I considered splitting the patch up a bit differently but decided
that a single patch seemed ok. I'm happy to split it up a bit if that's
what people prefer.

 drivers/gpu/drm/i915/i915_cmd_parser.c | 187 +
 drivers/gpu/drm/i915/i915_drv.h|   8 +-
 2 files changed, 104 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 9d79543..fd35900 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -110,6 +110,7 @@
 #define W CMD_DESC_REGISTER
 #define B CMD_DESC_BITMASK
 #define M CMD_DESC_MASTER
+#define P CMD_DESC_PPGTT
 
 /*Command  Mask   Fixed Len   Action
  -- */
@@ -124,20 +125,20 @@ static const struct drm_i915_cmd_descriptor common_cmds[] 
= {
CMD(  MI_STORE_DWORD_INDEX, SMI,   !F,  0xFF,   R  ),
CMD(  MI_LOAD_REGISTER_IMM(1),  SMI,   !F,  0xFF,   W,
  .reg = { .offset = 1, .mask = 0x007C }   ),
-   CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   W | B,
+   CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   W | P,
  .reg = { .offset = 1, .mask = 0x007C },
- .bits = {{
+ .ppgtt = {
.offset = 0,
.mask = MI_GLOBAL_GTT,
.expected = 0,
- }},  ),
-   CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   W | B,
+ },   ),
+   CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   W | P,
  .reg = { .offset = 1, .mask = 0x007C },
- .bits = {{
+ .ppgtt = {
.offset = 0,
.mask = MI_GLOBAL_GTT,
.expected = 0,
- }},  ),
+ },   ),
CMD(  MI_BATCH_BUFFER_START,SMI,   !F,  0xFF,   S  ),
 };
 
@@ -149,31 +150,31 @@ static const struct drm_i915_cmd_descriptor render_cmds[] 
= {
CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   R  ),
CMD(  MI_SET_CONTEXT,   SMI,   !F,  0xFF,   R  ),
CMD(  MI_URB_CLEAR, SMI,   !F,  0xFF,   S  ),
-   CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0x3F,   B,
- .bits = {{
+   CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0x3F,   P,
+ .ppgtt = {
.offset = 0,
.mask = MI_GLOBAL_GTT,
.expected = 0,
- }},  ),
+ },   ),
CMD(  MI_UPDATE_GTT,SMI,   !F,  0xFF,   R  ),
-   CMD(  MI_CLFLUSH,   SMI,   !F,  0x3FF,  B,
- .bits = {{
+   CMD(  MI_CLFLUSH,   SMI,   !F,  0x3FF,  P,
+ .ppgtt = {
.offset = 0,
.mask = MI_GLOBAL_GTT,
.expected = 0,
- }},  ),
-   CMD(  MI_REPORT_PERF_COUNT, SMI,   !F,  0x3F,   B,
- .bits = {{
+ },   ),
+   CMD(  MI_REPORT_PERF_COUNT, SMI,   !F,  0x3F,   P,
+ .ppgtt = {
.offset = 1,
.mask = MI_REPORT_PERF_COUNT_GGTT,
.expected = 0,
- }},  ),
-   CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   B,
- .bits = {{
+ },   ),
+   CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   P,
+ .ppgtt = {
.offset = 0,
.mask = 

[Intel-gfx] [PATCH] drm/i915: Use hash tables for the command parser

2014-05-10 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

For clients that submit large batch buffers the command parser has
a substantial impact on performance. On my HSW ULT system performance
drops as much as ~20% on some tests. Most of the time is spent in the
command lookup code. Converting that from the current naive search to
a hash table lookup reduces the performance drop to ~10%.

The choice of value for I915_CMD_HASH_ORDER allows all commands
currently used in the parser tables to hash to their own bucket (except
for one collision on the render ring). The tradeoff is that it wastes
memory. Because the opcodes for the commands in the tables are not
particularly well distributed, reducing the order still leaves many
buckets empty. The increased collisions don't seem to have a huge
impact on the performance gain, but for now anyhow, the parser trades
memory for performance.

NB: Ville noticed that the error paths through the ring init code
will leak memory. I've not addressed that here. We can do a follow
up pass to handle all of the leaks.

v2: improved comment describing selection of hash key mask (Damien)
replace a BUG_ON() with an error return (Tvrtko, Ville)
commit message improvements

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c  | 158 +---
 drivers/gpu/drm/i915/i915_drv.h |   3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |   6 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h |  11 ++-
 4 files changed, 140 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 69d34e4..d3a5b74 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -498,16 +498,18 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
return 0;
 }
 
-static bool validate_cmds_sorted(struct intel_ring_buffer *ring)
+static bool validate_cmds_sorted(struct intel_ring_buffer *ring,
+const struct drm_i915_cmd_table *cmd_tables,
+int cmd_table_count)
 {
int i;
bool ret = true;
 
-   if (!ring-cmd_tables || ring-cmd_table_count == 0)
+   if (!cmd_tables || cmd_table_count == 0)
return true;
 
-   for (i = 0; i  ring-cmd_table_count; i++) {
-   const struct drm_i915_cmd_table *table = ring-cmd_tables[i];
+   for (i = 0; i  cmd_table_count; i++) {
+   const struct drm_i915_cmd_table *table = cmd_tables[i];
u32 previous = 0;
int j;
 
@@ -557,6 +559,68 @@ static bool validate_regs_sorted(struct intel_ring_buffer 
*ring)
 ring-master_reg_count);
 }
 
+struct cmd_node {
+   const struct drm_i915_cmd_descriptor *desc;
+   struct hlist_node node;
+};
+
+/*
+ * Different command ranges have different numbers of bits for the opcode. For
+ * example, MI commands use bits 31:23 while 3D commands use bits 31:16. The
+ * problem is that, for example, MI commands use bits 22:16 for other fields
+ * such as GGTT vs PPGTT bits. If we include those bits in the mask then when
+ * we mask a command from a batch it could hash to the wrong bucket due to
+ * non-opcode bits being set. But if we don't include those bits, some 3D
+ * commands may hash to the same bucket due to not including opcode bits that
+ * make the command unique. For now, we will risk hashing to the same bucket.
+ *
+ * If we attempt to generate a perfect hash, we should be able to look at bits
+ * 31:29 of a command from a batch buffer and use the full mask for that
+ * client. The existing INSTR_CLIENT_MASK/SHIFT defines can be used for this.
+ */
+#define CMD_HASH_MASK STD_MI_OPCODE_MASK
+
+static int init_hash_table(struct intel_ring_buffer *ring,
+  const struct drm_i915_cmd_table *cmd_tables,
+  int cmd_table_count)
+{
+   int i, j;
+
+   hash_init(ring-cmd_hash);
+
+   for (i = 0; i  cmd_table_count; i++) {
+   const struct drm_i915_cmd_table *table = cmd_tables[i];
+
+   for (j = 0; j  table-count; j++) {
+   const struct drm_i915_cmd_descriptor *desc =
+   table-table[j];
+   struct cmd_node *desc_node =
+   kmalloc(sizeof(*desc_node), GFP_KERNEL);
+
+   if (!desc_node)
+   return -ENOMEM;
+
+   desc_node-desc = desc;
+   hash_add(ring-cmd_hash, desc_node-node,
+desc-cmd.value  CMD_HASH_MASK);
+   }
+   }
+
+   return 0;
+}
+
+static void fini_hash_table(struct intel_ring_buffer *ring)
+{
+   struct hlist_node *tmp;
+   struct cmd_node *desc_node;
+   int i;
+
+   hash_for_each_safe(ring-cmd_hash, i, tmp, desc_node, node) {
+   

[Intel-gfx] [PATCH] tests/gen7_forcewake_mt: Don't set the GGTT bit in SRM command

2014-05-10 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

The command parser in newer kernels will reject it and setting this
bit is not required for the actual test case.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76670
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---

This is a resend of
http://lists.freedesktop.org/archives/intel-gfx/2014-April/043223.html

There was initially some discussion as to the fact that the test was
written to reflect the implementation of a workaround in the ddx and
whether this patch lead to a deviation between the two. There was no
real closure on that discussion, however, I don't believe the
MI_STORE_REGISTER_MEM aspect of the test is relevant to the ddx code,
so I'd like to move forward with this or get clear direction on the
preferred solution.

 tests/gen7_forcewake_mt.c | 55 +--
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/tests/gen7_forcewake_mt.c b/tests/gen7_forcewake_mt.c
index fdc34ce..3afd80a 100644
--- a/tests/gen7_forcewake_mt.c
+++ b/tests/gen7_forcewake_mt.c
@@ -121,7 +121,7 @@ static void *thread(void *arg)
 }
 
 #define MI_LOAD_REGISTER_IMM(0x2223)
-#define MI_STORE_REGISTER_MEM   (0x2423| 122)
+#define MI_STORE_REGISTER_MEM   (0x2423)
 
 igt_simple_main
 {
@@ -140,8 +140,9 @@ igt_simple_main
sleep(2);
 
for (i = 0; i  1000; i++) {
+   uint32_t *p;
struct drm_i915_gem_execbuffer2 execbuf;
-   struct drm_i915_gem_exec_object2 exec;
+   struct drm_i915_gem_exec_object2 exec[2];
struct drm_i915_gem_relocation_entry reloc[2];
uint32_t b[] = {
MI_LOAD_REGISTER_IMM | 1,
@@ -149,54 +150,56 @@ igt_simple_main
2  16 | 2,
MI_STORE_REGISTER_MEM | 1,
FORCEWAKE_MT,
-   5*sizeof(uint32_t),
+   0, // to be patched
MI_LOAD_REGISTER_IMM | 1,
FORCEWAKE_MT,
2  16,
MI_STORE_REGISTER_MEM | 1,
FORCEWAKE_MT,
-   11*sizeof(uint32_t),
+   1 * sizeof(uint32_t), // to be patched
MI_BATCH_BUFFER_END,
0
};
 
-   memset(exec, 0, sizeof(exec));
-   exec.handle = gem_create(t[0].fd, 4096);
-   exec.relocation_count = 2;
-   exec.relocs_ptr = (uintptr_t)reloc;
-   //exec.flags = EXEC_OBJECT_NEEDS_GTT;
-   gem_write(t[0].fd, exec.handle, 0, b, sizeof(b));
+   memset(exec, 0, sizeof(exec));
+   exec[1].handle = gem_create(t[0].fd, 4096);
+   exec[1].relocation_count = 2;
+   exec[1].relocs_ptr = (uintptr_t)reloc;
+   gem_write(t[0].fd, exec[1].handle, 0, b, sizeof(b));
+   exec[0].handle = gem_create(t[0].fd, 4096);
 
reloc[0].offset = 5 * sizeof(uint32_t);
-   reloc[0].delta = 5 * sizeof(uint32_t);
-   reloc[0].target_handle = exec.handle;
-   reloc[0].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-   reloc[0].write_domain = 0;
+   reloc[0].delta = 0;
+   reloc[0].target_handle = exec[0].handle;
+   reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+   reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
reloc[0].presumed_offset = 0;
 
reloc[1].offset = 11 * sizeof(uint32_t);
-   reloc[1].delta = 11 * sizeof(uint32_t);
-   reloc[1].target_handle = exec.handle;
-   reloc[1].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-   reloc[1].write_domain = 0;
+   reloc[1].delta = 1 * sizeof(uint32_t);
+   reloc[1].target_handle = exec[0].handle;
+   reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+   reloc[1].write_domain = I915_GEM_DOMAIN_RENDER;
reloc[1].presumed_offset = 0;
 
memset(execbuf, 0, sizeof(execbuf));
execbuf.buffers_ptr = (uintptr_t)exec;
-   execbuf.buffer_count = 1;
+   execbuf.buffer_count = 2;
execbuf.batch_len = sizeof(b);
execbuf.flags = I915_EXEC_SECURE;
 
gem_execbuf(t[0].fd, execbuf);
-   gem_sync(t[0].fd, exec.handle);
-   gem_read(t[0].fd, exec.handle, 0, b, sizeof(b));
-   gem_close(t[0].fd, exec.handle);
+   gem_sync(t[0].fd, exec[1].handle);
 
-   printf([%d]={ %08x %08x %08x %08x %08x %08x %08x %08x %08x 
%08x %08x %08x %08x }\n,
-  i, b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], 
b[9], b[10], b[11], b[12]);
+   p 

[Intel-gfx] [PATCH] drm/i915: Use hash tables for the command parser

2014-04-28 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

For clients that submit large batch buffers the command parser has
a substantial impact on performance. On my HSW ULT system performance
drops as much as ~20% on some tests. Most of the time is spent in the
command lookup code. Converting that from the current naive search to
a hash table lookup reduces the performance impact by as much as ~10%.

The choice of value for I915_CMD_HASH_ORDER allows all commands
currently used in the parser tables to hash to their own bucket (except
for one collision on the render ring). The tradeoff is that it wastes
memory. Because the opcodes for the commands in the tables are not
particularly well distributed, reducing the order still leaves many
buckets empty. The increased collisions don't seem to have a huge
impact on the performance gain, but for now anyhow, the parser trades
memory for performance.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c  | 136 
 drivers/gpu/drm/i915/i915_drv.h |   1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |   2 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |  11 ++-
 4 files changed, 116 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 9bac097..9dca899 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -498,16 +498,18 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
return 0;
 }
 
-static bool validate_cmds_sorted(struct intel_ring_buffer *ring)
+static bool validate_cmds_sorted(struct intel_ring_buffer *ring,
+const struct drm_i915_cmd_table *cmd_tables,
+int cmd_table_count)
 {
int i;
bool ret = true;
 
-   if (!ring-cmd_tables || ring-cmd_table_count == 0)
+   if (!cmd_tables || cmd_table_count == 0)
return true;
 
-   for (i = 0; i  ring-cmd_table_count; i++) {
-   const struct drm_i915_cmd_table *table = ring-cmd_tables[i];
+   for (i = 0; i  cmd_table_count; i++) {
+   const struct drm_i915_cmd_table *table = cmd_tables[i];
u32 previous = 0;
int j;
 
@@ -557,6 +559,60 @@ static bool validate_regs_sorted(struct intel_ring_buffer 
*ring)
 ring-master_reg_count);
 }
 
+struct cmd_node {
+   const struct drm_i915_cmd_descriptor *desc;
+   struct hlist_node node;
+};
+
+/*
+ * Different command ranges have different numbers of bits for the opcode.
+ * In order to use the opcode bits, and only the opcode bits, for the hash key
+ * we should use the MI_* command opcode mask (since those commands use the
+ * fewest bits for the opcode.)
+ */
+#define CMD_HASH_MASK STD_MI_OPCODE_MASK
+
+static int init_hash_table(struct intel_ring_buffer *ring,
+  const struct drm_i915_cmd_table *cmd_tables,
+  int cmd_table_count)
+{
+   int i, j;
+
+   hash_init(ring-cmd_hash);
+
+   for (i = 0; i  cmd_table_count; i++) {
+   const struct drm_i915_cmd_table *table = cmd_tables[i];
+
+   for (j = 0; j  table-count; j++) {
+   const struct drm_i915_cmd_descriptor *desc =
+   table-table[j];
+   struct cmd_node *desc_node =
+   kmalloc(sizeof(*desc_node), GFP_KERNEL);
+
+   if (!desc_node)
+   return -ENOMEM;
+
+   desc_node-desc = desc;
+   hash_add(ring-cmd_hash, desc_node-node,
+desc-cmd.value  CMD_HASH_MASK);
+   }
+   }
+
+   return 0;
+}
+
+static void fini_hash_table(struct intel_ring_buffer *ring)
+{
+   struct hlist_node *tmp;
+   struct cmd_node *desc_node;
+   int i;
+
+   hash_for_each_safe(ring-cmd_hash, i, tmp, desc_node, node) {
+   hash_del(desc_node-node);
+   kfree(desc_node);
+   }
+}
+
 /**
  * i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer
  * @ring: the ringbuffer to initialize
@@ -567,18 +623,21 @@ static bool validate_regs_sorted(struct intel_ring_buffer 
*ring)
  */
 void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring)
 {
+   const struct drm_i915_cmd_table *cmd_tables;
+   int cmd_table_count;
+
if (!IS_GEN7(ring-dev))
return;
 
switch (ring-id) {
case RCS:
if (IS_HASWELL(ring-dev)) {
-   ring-cmd_tables = hsw_render_ring_cmds;
-   ring-cmd_table_count =
+   cmd_tables = hsw_render_ring_cmds;
+   cmd_table_count =
ARRAY_SIZE(hsw_render_ring_cmds);
} else {
- 

[Intel-gfx] [PATCH] SQUASH: drm/i915: One more register for mesa

2014-04-09 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Originally left out because it wasn't used. But it may be needed
and doesn't pose any risk, so add to the whitelist.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 1 +
 drivers/gpu/drm/i915/i915_reg.h| 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 3486ef7..9bac097 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -408,6 +408,7 @@ static const u32 gen7_render_regs[] = {
REG64(PS_INVOCATION_COUNT),
REG64(PS_DEPTH_COUNT),
OACONTROL, /* Only allowed for LRI and SRM. See below. */
+   GEN7_3DPRIM_END_OFFSET,
GEN7_3DPRIM_START_VERTEX,
GEN7_3DPRIM_VERTEX_COUNT,
GEN7_3DPRIM_INSTANCE_COUNT,
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f49569b..46ea233 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -430,6 +430,7 @@
 
 #define GEN7_SO_PRIM_STORAGE_NEEDED(n)  (0x5240 + (n) * 8)
 
+#define GEN7_3DPRIM_END_OFFSET  0x2420
 #define GEN7_3DPRIM_START_VERTEX0x2430
 #define GEN7_3DPRIM_VERTEX_COUNT0x2434
 #define GEN7_3DPRIM_INSTANCE_COUNT  0x2438
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] tests/gen7_forcewake_mt: Don't set the GGTT bit in SRM command

2014-04-08 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

The command parser in newer kernels will reject it and setting this
bit is not required for the actual test case.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76670
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 tests/gen7_forcewake_mt.c | 55 +--
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/tests/gen7_forcewake_mt.c b/tests/gen7_forcewake_mt.c
index fdc34ce..3afd80a 100644
--- a/tests/gen7_forcewake_mt.c
+++ b/tests/gen7_forcewake_mt.c
@@ -121,7 +121,7 @@ static void *thread(void *arg)
 }
 
 #define MI_LOAD_REGISTER_IMM(0x2223)
-#define MI_STORE_REGISTER_MEM   (0x2423| 122)
+#define MI_STORE_REGISTER_MEM   (0x2423)
 
 igt_simple_main
 {
@@ -140,8 +140,9 @@ igt_simple_main
sleep(2);
 
for (i = 0; i  1000; i++) {
+   uint32_t *p;
struct drm_i915_gem_execbuffer2 execbuf;
-   struct drm_i915_gem_exec_object2 exec;
+   struct drm_i915_gem_exec_object2 exec[2];
struct drm_i915_gem_relocation_entry reloc[2];
uint32_t b[] = {
MI_LOAD_REGISTER_IMM | 1,
@@ -149,54 +150,56 @@ igt_simple_main
2  16 | 2,
MI_STORE_REGISTER_MEM | 1,
FORCEWAKE_MT,
-   5*sizeof(uint32_t),
+   0, // to be patched
MI_LOAD_REGISTER_IMM | 1,
FORCEWAKE_MT,
2  16,
MI_STORE_REGISTER_MEM | 1,
FORCEWAKE_MT,
-   11*sizeof(uint32_t),
+   1 * sizeof(uint32_t), // to be patched
MI_BATCH_BUFFER_END,
0
};
 
-   memset(exec, 0, sizeof(exec));
-   exec.handle = gem_create(t[0].fd, 4096);
-   exec.relocation_count = 2;
-   exec.relocs_ptr = (uintptr_t)reloc;
-   //exec.flags = EXEC_OBJECT_NEEDS_GTT;
-   gem_write(t[0].fd, exec.handle, 0, b, sizeof(b));
+   memset(exec, 0, sizeof(exec));
+   exec[1].handle = gem_create(t[0].fd, 4096);
+   exec[1].relocation_count = 2;
+   exec[1].relocs_ptr = (uintptr_t)reloc;
+   gem_write(t[0].fd, exec[1].handle, 0, b, sizeof(b));
+   exec[0].handle = gem_create(t[0].fd, 4096);
 
reloc[0].offset = 5 * sizeof(uint32_t);
-   reloc[0].delta = 5 * sizeof(uint32_t);
-   reloc[0].target_handle = exec.handle;
-   reloc[0].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-   reloc[0].write_domain = 0;
+   reloc[0].delta = 0;
+   reloc[0].target_handle = exec[0].handle;
+   reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+   reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
reloc[0].presumed_offset = 0;
 
reloc[1].offset = 11 * sizeof(uint32_t);
-   reloc[1].delta = 11 * sizeof(uint32_t);
-   reloc[1].target_handle = exec.handle;
-   reloc[1].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-   reloc[1].write_domain = 0;
+   reloc[1].delta = 1 * sizeof(uint32_t);
+   reloc[1].target_handle = exec[0].handle;
+   reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+   reloc[1].write_domain = I915_GEM_DOMAIN_RENDER;
reloc[1].presumed_offset = 0;
 
memset(execbuf, 0, sizeof(execbuf));
execbuf.buffers_ptr = (uintptr_t)exec;
-   execbuf.buffer_count = 1;
+   execbuf.buffer_count = 2;
execbuf.batch_len = sizeof(b);
execbuf.flags = I915_EXEC_SECURE;
 
gem_execbuf(t[0].fd, execbuf);
-   gem_sync(t[0].fd, exec.handle);
-   gem_read(t[0].fd, exec.handle, 0, b, sizeof(b));
-   gem_close(t[0].fd, exec.handle);
+   gem_sync(t[0].fd, exec[1].handle);
 
-   printf([%d]={ %08x %08x %08x %08x %08x %08x %08x %08x %08x 
%08x %08x %08x %08x }\n,
-  i, b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], 
b[9], b[10], b[11], b[12]);
+   p = gem_mmap(t[0].fd, exec[0].handle, 4096, PROT_READ);
 
-   igt_assert(b[5]  2);
-   igt_assert((b[11]  2) == 0);
+   printf([%d]={ %08x %08x }\n, i, p[0], p[1]);
+   igt_assert(p[0]  2);
+   igt_assert((p[1]  2) == 0);
+
+   munmap(p, 4096);
+   gem_close(t[0].fd, exec[0].handle);
+   gem_close(t[0].fd, exec[1].handle);
 
usleep(1000);
}
-- 
1.8.3.2

___

[Intel-gfx] [PATCH] drm/i915: Add more registers to the whitelist for mesa

2014-04-08 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

These are additional registers needed for performance monitoring and
ARB_draw_indirect extensions in mesa.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76719
Cc: Kenneth Graunke kenn...@whitecape.org
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 9 +
 drivers/gpu/drm/i915/i915_reg.h| 8 
 2 files changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 29184d6..3486ef7 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -408,10 +408,19 @@ static const u32 gen7_render_regs[] = {
REG64(PS_INVOCATION_COUNT),
REG64(PS_DEPTH_COUNT),
OACONTROL, /* Only allowed for LRI and SRM. See below. */
+   GEN7_3DPRIM_START_VERTEX,
+   GEN7_3DPRIM_VERTEX_COUNT,
+   GEN7_3DPRIM_INSTANCE_COUNT,
+   GEN7_3DPRIM_START_INSTANCE,
+   GEN7_3DPRIM_BASE_VERTEX,
REG64(GEN7_SO_NUM_PRIMS_WRITTEN(0)),
REG64(GEN7_SO_NUM_PRIMS_WRITTEN(1)),
REG64(GEN7_SO_NUM_PRIMS_WRITTEN(2)),
REG64(GEN7_SO_NUM_PRIMS_WRITTEN(3)),
+   REG64(GEN7_SO_PRIM_STORAGE_NEEDED(0)),
+   REG64(GEN7_SO_PRIM_STORAGE_NEEDED(1)),
+   REG64(GEN7_SO_PRIM_STORAGE_NEEDED(2)),
+   REG64(GEN7_SO_PRIM_STORAGE_NEEDED(3)),
GEN7_SO_WRITE_OFFSET(0),
GEN7_SO_WRITE_OFFSET(1),
GEN7_SO_WRITE_OFFSET(2),
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8e60737..533ec0a 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -427,6 +427,14 @@
 /* There are the 4 64-bit counter registers, one for each stream output */
 #define GEN7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8)
 
+#define GEN7_SO_PRIM_STORAGE_NEEDED(n)  (0x5240 + (n) * 8)
+
+#define GEN7_3DPRIM_START_VERTEX0x2430
+#define GEN7_3DPRIM_VERTEX_COUNT0x2434
+#define GEN7_3DPRIM_INSTANCE_COUNT  0x2438
+#define GEN7_3DPRIM_START_INSTANCE  0x243C
+#define GEN7_3DPRIM_BASE_VERTEX 0x2440
+
 #define OACONTROL 0x2360
 
 #define _GEN7_PIPEA_DE_LOAD_SL 0x70068
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 3/3] drm/i915: Track OACONTROL register enable/disable during parsing

2014-03-28 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

There is some thought that the data from the performance counters enabled
via OACONTROL should only be available to the process that enabled counting.
To limit snooping, require that any batch buffer which sets OACONTROL to a
non-zero value also sets it back to 0 before the end of the batch.

This requires limiting OACONTROL writes to happen via MI_LOAD_REGISTER_IMM
so that we can access the value being written. This should be in line with
the expected use case for writing OACONTROL.

v2: Drop an unnecessary '? true : false'

Cc: Kenneth Graunke kenn...@whitecape.org
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 35 ++
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 2eb2aca..34e2d45 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -407,12 +407,7 @@ static const u32 gen7_render_regs[] = {
REG64(CL_PRIMITIVES_COUNT),
REG64(PS_INVOCATION_COUNT),
REG64(PS_DEPTH_COUNT),
-   /*
-* FIXME: This is just to keep mesa working for now, we need to check
-* that mesa resets this again and that it doesn't use any of the
-* special modes which write into the gtt.
-*/
-   OACONTROL,
+   OACONTROL, /* Only allowed for LRI and SRM. See below. */
REG64(GEN7_SO_NUM_PRIMS_WRITTEN(0)),
REG64(GEN7_SO_NUM_PRIMS_WRITTEN(1)),
REG64(GEN7_SO_NUM_PRIMS_WRITTEN(2)),
@@ -761,7 +756,8 @@ bool i915_needs_cmd_parser(struct intel_ring_buffer *ring)
 static bool check_cmd(const struct intel_ring_buffer *ring,
  const struct drm_i915_cmd_descriptor *desc,
  const u32 *cmd,
- const bool is_master)
+ const bool is_master,
+ bool *oacontrol_set)
 {
if (desc-flags  CMD_DESC_REJECT) {
DRM_DEBUG_DRIVER(CMD: Rejected command: 0x%08X\n, *cmd);
@@ -777,6 +773,23 @@ static bool check_cmd(const struct intel_ring_buffer *ring,
if (desc-flags  CMD_DESC_REGISTER) {
u32 reg_addr = cmd[desc-reg.offset]  desc-reg.mask;
 
+   /*
+* OACONTROL requires some special handling for writes. We
+* want to make sure that any batch which enables OA also
+* disables it before the end of the batch. The goal is to
+* prevent one process from snooping on the perf data from
+* another process. To do that, we need to check the value
+* that will be written to the register. Hence, limit
+* OACONTROL writes to only MI_LOAD_REGISTER_IMM commands.
+*/
+   if (reg_addr == OACONTROL) {
+   if (desc-cmd.value == MI_LOAD_REGISTER_MEM)
+   return false;
+
+   if (desc-cmd.value == MI_LOAD_REGISTER_IMM(1))
+   *oacontrol_set = (cmd[2] != 0);
+   }
+
if (!valid_reg(ring-reg_table,
   ring-reg_count, reg_addr)) {
if (!is_master ||
@@ -851,6 +864,7 @@ int i915_parse_cmds(struct intel_ring_buffer *ring,
u32 *cmd, *batch_base, *batch_end;
struct drm_i915_cmd_descriptor default_desc = { 0 };
int needs_clflush = 0;
+   bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
ret = i915_gem_obj_prepare_shmem_read(batch_obj, needs_clflush);
if (ret) {
@@ -900,7 +914,7 @@ int i915_parse_cmds(struct intel_ring_buffer *ring,
break;
}
 
-   if (!check_cmd(ring, desc, cmd, is_master)) {
+   if (!check_cmd(ring, desc, cmd, is_master, oacontrol_set)) {
ret = -EINVAL;
break;
}
@@ -908,6 +922,11 @@ int i915_parse_cmds(struct intel_ring_buffer *ring,
cmd += length;
}
 
+   if (oacontrol_set) {
+   DRM_DEBUG_DRIVER(CMD: batch set OACONTROL but did not clear 
it\n);
+   ret = -EINVAL;
+   }
+
if (cmd = batch_end) {
DRM_DEBUG_DRIVER(CMD: Got to the end of the buffer w/o a BBE 
cmd!\n);
ret = -EINVAL;
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/3] drm/i915: Refactor cmd parser checks into a function

2014-03-27 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This brings the code a little more in line with kernel coding style.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 136 +
 1 file changed, 71 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 8a93db3..2eb2aca 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -758,6 +758,76 @@ bool i915_needs_cmd_parser(struct intel_ring_buffer *ring)
return (i915.enable_cmd_parser == 1);
 }
 
+static bool check_cmd(const struct intel_ring_buffer *ring,
+ const struct drm_i915_cmd_descriptor *desc,
+ const u32 *cmd,
+ const bool is_master)
+{
+   if (desc-flags  CMD_DESC_REJECT) {
+   DRM_DEBUG_DRIVER(CMD: Rejected command: 0x%08X\n, *cmd);
+   return false;
+   }
+
+   if ((desc-flags  CMD_DESC_MASTER)  !is_master) {
+   DRM_DEBUG_DRIVER(CMD: Rejected master-only command: 0x%08X\n,
+*cmd);
+   return false;
+   }
+
+   if (desc-flags  CMD_DESC_REGISTER) {
+   u32 reg_addr = cmd[desc-reg.offset]  desc-reg.mask;
+
+   if (!valid_reg(ring-reg_table,
+  ring-reg_count, reg_addr)) {
+   if (!is_master ||
+   !valid_reg(ring-master_reg_table,
+  ring-master_reg_count,
+  reg_addr)) {
+   DRM_DEBUG_DRIVER(CMD: Rejected register 0x%08X 
in command: 0x%08X (ring=%d)\n,
+reg_addr,
+*cmd,
+ring-id);
+   return false;
+   }
+   }
+   }
+
+   if (desc-flags  CMD_DESC_BITMASK) {
+   int i;
+
+   for (i = 0; i  MAX_CMD_DESC_BITMASKS; i++) {
+   u32 dword;
+
+   if (desc-bits[i].mask == 0)
+   break;
+
+   if (desc-bits[i].condition_mask != 0) {
+   u32 offset =
+   desc-bits[i].condition_offset;
+   u32 condition = cmd[offset] 
+   desc-bits[i].condition_mask;
+
+   if (condition == 0)
+   continue;
+   }
+
+   dword = cmd[desc-bits[i].offset] 
+   desc-bits[i].mask;
+
+   if (dword != desc-bits[i].expected) {
+   DRM_DEBUG_DRIVER(CMD: Rejected command 0x%08X 
for bitmask 0x%08X (exp=0x%08X act=0x%08X) (ring=%d)\n,
+*cmd,
+desc-bits[i].mask,
+desc-bits[i].expected,
+dword, ring-id);
+   return false;
+   }
+   }
+   }
+
+   return true;
+}
+
 #define LENGTH_BIAS 2
 
 /**
@@ -830,75 +900,11 @@ int i915_parse_cmds(struct intel_ring_buffer *ring,
break;
}
 
-   if (desc-flags  CMD_DESC_REJECT) {
-   DRM_DEBUG_DRIVER(CMD: Rejected command: 0x%08X\n, 
*cmd);
+   if (!check_cmd(ring, desc, cmd, is_master)) {
ret = -EINVAL;
break;
}
 
-   if ((desc-flags  CMD_DESC_MASTER)  !is_master) {
-   DRM_DEBUG_DRIVER(CMD: Rejected master-only command: 
0x%08X\n,
-*cmd);
-   ret = -EINVAL;
-   break;
-   }
-
-   if (desc-flags  CMD_DESC_REGISTER) {
-   u32 reg_addr = cmd[desc-reg.offset]  desc-reg.mask;
-
-   if (!valid_reg(ring-reg_table,
-  ring-reg_count, reg_addr)) {
-   if (!is_master ||
-   !valid_reg(ring-master_reg_table,
-  ring-master_reg_count,
-  reg_addr)) {
-   DRM_DEBUG_DRIVER(CMD: Rejected 
register 0x%08X in command: 0x%08X (ring=%d)\n,
-reg_addr,
-*cmd,
-  

[Intel-gfx] [PATCH 3/3] drm/i915: Track OACONTROL register enable/disable during parsing

2014-03-27 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

There is some thought that the data from the performance counters enabled
via OACONTROL should only be available to the process that enabled counting.
To limit snooping, require that any batch buffer which sets OACONTROL to a
non-zero value also sets it back to 0 before the end of the batch.

This requires limiting OACONTROL writes to happen via MI_LOAD_REGISTER_IMM
so that we can access the value being written. This should be in line with
the expected use case for writing OACONTROL.

Cc: Kenneth Graunke kenn...@whitecape.org
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 35 ++
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 2eb2aca..779e14c 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -407,12 +407,7 @@ static const u32 gen7_render_regs[] = {
REG64(CL_PRIMITIVES_COUNT),
REG64(PS_INVOCATION_COUNT),
REG64(PS_DEPTH_COUNT),
-   /*
-* FIXME: This is just to keep mesa working for now, we need to check
-* that mesa resets this again and that it doesn't use any of the
-* special modes which write into the gtt.
-*/
-   OACONTROL,
+   OACONTROL, /* Only allowed for LRI and SRM. See below. */
REG64(GEN7_SO_NUM_PRIMS_WRITTEN(0)),
REG64(GEN7_SO_NUM_PRIMS_WRITTEN(1)),
REG64(GEN7_SO_NUM_PRIMS_WRITTEN(2)),
@@ -761,7 +756,8 @@ bool i915_needs_cmd_parser(struct intel_ring_buffer *ring)
 static bool check_cmd(const struct intel_ring_buffer *ring,
  const struct drm_i915_cmd_descriptor *desc,
  const u32 *cmd,
- const bool is_master)
+ const bool is_master,
+ bool *oacontrol_set)
 {
if (desc-flags  CMD_DESC_REJECT) {
DRM_DEBUG_DRIVER(CMD: Rejected command: 0x%08X\n, *cmd);
@@ -777,6 +773,23 @@ static bool check_cmd(const struct intel_ring_buffer *ring,
if (desc-flags  CMD_DESC_REGISTER) {
u32 reg_addr = cmd[desc-reg.offset]  desc-reg.mask;
 
+   /*
+* OACONTROL requires some special handling for writes. We
+* want to make sure that any batch which enables OA also
+* disables it before the end of the batch. The goal is to
+* prevent one process from snooping on the perf data from
+* another process. To do that, we need to check the value
+* that will be written to the register. Hence, limit
+* OACONTROL writes to only MI_LOAD_REGISTER_IMM commands.
+*/
+   if (reg_addr == OACONTROL) {
+   if (desc-cmd.value == MI_LOAD_REGISTER_MEM)
+   return false;
+
+   if (desc-cmd.value == MI_LOAD_REGISTER_IMM(1))
+   *oacontrol_set = (cmd[2] != 0) ? true : false;
+   }
+
if (!valid_reg(ring-reg_table,
   ring-reg_count, reg_addr)) {
if (!is_master ||
@@ -851,6 +864,7 @@ int i915_parse_cmds(struct intel_ring_buffer *ring,
u32 *cmd, *batch_base, *batch_end;
struct drm_i915_cmd_descriptor default_desc = { 0 };
int needs_clflush = 0;
+   bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
ret = i915_gem_obj_prepare_shmem_read(batch_obj, needs_clflush);
if (ret) {
@@ -900,7 +914,7 @@ int i915_parse_cmds(struct intel_ring_buffer *ring,
break;
}
 
-   if (!check_cmd(ring, desc, cmd, is_master)) {
+   if (!check_cmd(ring, desc, cmd, is_master, oacontrol_set)) {
ret = -EINVAL;
break;
}
@@ -908,6 +922,11 @@ int i915_parse_cmds(struct intel_ring_buffer *ring,
cmd += length;
}
 
+   if (oacontrol_set) {
+   DRM_DEBUG_DRIVER(CMD: batch set OACONTROL but did not clear 
it\n);
+   ret = -EINVAL;
+   }
+
if (cmd = batch_end) {
DRM_DEBUG_DRIVER(CMD: Got to the end of the buffer w/o a BBE 
cmd!\n);
ret = -EINVAL;
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 0/3] Fix up cmd parser OACONTROL handling + refactorings

2014-03-27 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Patches 1 and 2 do some cleanups suggested as part of the review process.
Patch 3 continues the OACONTROL handling fixes from the other day.

I think patches 1 and 2 are valuable on their own. I think the need/benefit
for the tracking provided by patch 3 is somewhat unclear. Per Ken:

I don't really buy the snooping problem, though...just because I leave
OACONTROL set doesn't mean I'll get useful data.  Another context might
clobber it, and empirically the numbers seem to reset across RC6 anyway.
 So in actuality, they're likely to get bogus data.

Even if they did somehow miraculously get decent values, it basically
gives information akin to 'top', which is unprivileged on every system
I've ever used.

That argument makes sense to me, but I've gone ahead and written the patch in
the event that we do want it.

Brad Volkin (3):
  drm/i915: BUG_ON() when cmd/reg tables are not sorted
  drm/i915: Refactor cmd parser checks into a function
  drm/i915: Track OACONTROL register enable/disable during parsing

 drivers/gpu/drm/i915/i915_cmd_parser.c | 198 +++--
 1 file changed, 117 insertions(+), 81 deletions(-)

-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/3] drm/i915: BUG_ON() when cmd/reg tables are not sorted

2014-03-27 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

As suggested during review, this makes it much more obvious
when the tables are not sorted.

Cc: Jani Nikula jani.nik...@linux.intel.com
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 31 +--
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 788bd96..8a93db3 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -493,12 +493,13 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
return 0;
 }
 
-static void validate_cmds_sorted(struct intel_ring_buffer *ring)
+static bool validate_cmds_sorted(struct intel_ring_buffer *ring)
 {
int i;
+   bool ret = true;
 
if (!ring-cmd_tables || ring-cmd_table_count == 0)
-   return;
+   return true;
 
for (i = 0; i  ring-cmd_table_count; i++) {
const struct drm_i915_cmd_table *table = ring-cmd_tables[i];
@@ -510,35 +511,45 @@ static void validate_cmds_sorted(struct intel_ring_buffer 
*ring)
table-table[i];
u32 curr = desc-cmd.value  desc-cmd.mask;
 
-   if (curr  previous)
+   if (curr  previous) {
DRM_ERROR(CMD: table not sorted ring=%d 
table=%d entry=%d cmd=0x%08X prev=0x%08X\n,
  ring-id, i, j, curr, previous);
+   ret = false;
+   }
 
previous = curr;
}
}
+
+   return ret;
 }
 
-static void check_sorted(int ring_id, const u32 *reg_table, int reg_count)
+static bool check_sorted(int ring_id, const u32 *reg_table, int reg_count)
 {
int i;
u32 previous = 0;
+   bool ret = true;
 
for (i = 0; i  reg_count; i++) {
u32 curr = reg_table[i];
 
-   if (curr  previous)
+   if (curr  previous) {
DRM_ERROR(CMD: table not sorted ring=%d entry=%d 
reg=0x%08X prev=0x%08X\n,
  ring_id, i, curr, previous);
+   ret = false;
+   }
 
previous = curr;
}
+
+   return ret;
 }
 
-static void validate_regs_sorted(struct intel_ring_buffer *ring)
+static bool validate_regs_sorted(struct intel_ring_buffer *ring)
 {
-   check_sorted(ring-id, ring-reg_table, ring-reg_count);
-   check_sorted(ring-id, ring-master_reg_table, ring-master_reg_count);
+   return check_sorted(ring-id, ring-reg_table, ring-reg_count) 
+   check_sorted(ring-id, ring-master_reg_table,
+ring-master_reg_count);
 }
 
 /**
@@ -617,8 +628,8 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer 
*ring)
BUG();
}
 
-   validate_cmds_sorted(ring);
-   validate_regs_sorted(ring);
+   BUG_ON(!validate_cmds_sorted(ring));
+   BUG_ON(!validate_regs_sorted(ring));
 }
 
 static const struct drm_i915_cmd_descriptor*
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] tests/gem_exec_parse: Test for OACONTROL tracking

2014-03-27 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 tests/gem_exec_parse.c | 48 
 1 file changed, 48 insertions(+)

diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c
index 34d097d..853eb57 100644
--- a/tests/gem_exec_parse.c
+++ b/tests/gem_exec_parse.c
@@ -204,6 +204,8 @@ int fd;
 #define   PIPE_CONTROL_QW_WRITE(114)
 #define   PIPE_CONTROL_LRI_POST_OP (123)
 
+#define OACONTROL 0x2360
+
 igt_main
 {
igt_fixture {
@@ -337,6 +339,52 @@ igt_main
0));
}
 
+   igt_subtest(oacontrol-tracking) {
+   uint32_t lri_ok[] = {
+   MI_LOAD_REGISTER_IMM,
+   OACONTROL,
+   0x31337000,
+   MI_LOAD_REGISTER_IMM,
+   OACONTROL,
+   0x0,
+   MI_BATCH_BUFFER_END,
+   0
+   };
+   uint32_t lri_bad[] = {
+   MI_LOAD_REGISTER_IMM,
+   OACONTROL,
+   0x31337000,
+   MI_BATCH_BUFFER_END,
+   };
+   uint32_t lri_extra_bad[] = {
+   MI_LOAD_REGISTER_IMM,
+   OACONTROL,
+   0x31337000,
+   MI_LOAD_REGISTER_IMM,
+   OACONTROL,
+   0x0,
+   MI_LOAD_REGISTER_IMM,
+   OACONTROL,
+   0x31337000,
+   MI_BATCH_BUFFER_END,
+   };
+   igt_assert(
+   exec_batch(fd, handle,
+ lri_ok, sizeof(lri_ok),
+ I915_EXEC_RENDER,
+ 0));
+   igt_assert(
+   exec_batch(fd, handle,
+ lri_bad, sizeof(lri_bad),
+ I915_EXEC_RENDER,
+ -EINVAL));
+   igt_assert(
+   exec_batch(fd, handle,
+ lri_extra_bad, sizeof(lri_extra_bad),
+ I915_EXEC_RENDER,
+ -EINVAL));
+   }
+
igt_fixture {
gem_close(fd, handle);
 
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 07/13] drm/i915: Add register whitelist for DRM master

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

These are used to implement scanline waits in the X server.

v2: Use #defines instead of magic numbers

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 29 +
 drivers/gpu/drm/i915/i915_reg.h|  6 ++
 2 files changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 4347a30..353e5cf 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -281,6 +281,19 @@ static const u32 gen7_blt_regs[] = {
BCS_SWCTRL,
 };
 
+static const u32 ivb_master_regs[] = {
+   FORCEWAKE_MT,
+   DERRMR,
+   GEN7_PIPE_DE_LOAD_SL(PIPE_A),
+   GEN7_PIPE_DE_LOAD_SL(PIPE_B),
+   GEN7_PIPE_DE_LOAD_SL(PIPE_C),
+};
+
+static const u32 hsw_master_regs[] = {
+   FORCEWAKE_MT,
+   DERRMR,
+};
+
 #undef REG64
 
 static u32 gen7_render_get_cmd_length_mask(u32 cmd_header)
@@ -409,6 +422,14 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer 
*ring)
ring-reg_table = gen7_render_regs;
ring-reg_count = ARRAY_SIZE(gen7_render_regs);
 
+   if (IS_HASWELL(ring-dev)) {
+   ring-master_reg_table = hsw_master_regs;
+   ring-master_reg_count = ARRAY_SIZE(hsw_master_regs);
+   } else {
+   ring-master_reg_table = ivb_master_regs;
+   ring-master_reg_count = ARRAY_SIZE(ivb_master_regs);
+   }
+
ring-get_cmd_length_mask = gen7_render_get_cmd_length_mask;
break;
case VCS:
@@ -428,6 +449,14 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer 
*ring)
ring-reg_table = gen7_blt_regs;
ring-reg_count = ARRAY_SIZE(gen7_blt_regs);
 
+   if (IS_HASWELL(ring-dev)) {
+   ring-master_reg_table = hsw_master_regs;
+   ring-master_reg_count = ARRAY_SIZE(hsw_master_regs);
+   } else {
+   ring-master_reg_table = ivb_master_regs;
+   ring-master_reg_count = ARRAY_SIZE(ivb_master_regs);
+   }
+
ring-get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
break;
case VECS:
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 1f2aeba..87523df 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -415,6 +415,12 @@
 /* There are the 4 64-bit counter registers, one for each stream output */
 #define GEN7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8)
 
+#define _GEN7_PIPEA_DE_LOAD_SL 0x70068
+#define _GEN7_PIPEB_DE_LOAD_SL 0x71068
+#define GEN7_PIPE_DE_LOAD_SL(pipe) _PIPE(pipe, \
+_GEN7_PIPEA_DE_LOAD_SL, \
+_GEN7_PIPEB_DE_LOAD_SL)
+
 /*
  * Reset registers
  */
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 09/13] drm/i915: Reject commands that explicitly generate interrupts

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

The driver leaves most interrupts masked during normal operation,
so there would have to be additional work to enable userspace to
safely request/receive an interrupt.

v2: trailing commas, rebased

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 22 --
 drivers/gpu/drm/i915/i915_reg.h|  1 +
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 4f14a24..0351df1 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -115,7 +115,7 @@
  -- */
 static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD(  MI_NOOP,  SMI,F,  1,  S  ),
-   CMD(  MI_USER_INTERRUPT,SMI,F,  1,  S  ),
+   CMD(  MI_USER_INTERRUPT,SMI,F,  1,  R  ),
CMD(  MI_WAIT_FOR_EVENT,SMI,F,  1,  M  ),
CMD(  MI_ARB_CHECK, SMI,F,  1,  S  ),
CMD(  MI_REPORT_HEAD,   SMI,F,  1,  S  ),
@@ -156,7 +156,7 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = 
{
CMD(  GFX_OP_PIPE_CONTROL(5),   S3D,   !F,  0xFF,   B,
  .bits = {{
.offset = 1,
-   .mask = PIPE_CONTROL_MMIO_WRITE,
+   .mask = (PIPE_CONTROL_MMIO_WRITE | PIPE_CONTROL_NOTIFY),
.expected = 0,
  }},  ),
 };
@@ -186,6 +186,12 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = 
{
CMD(  MI_ARB_ON_OFF,SMI,F,  1,  R  ),
CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0xFF,   S  ),
CMD(  MI_UPDATE_GTT,SMI,   !F,  0x3F,   R  ),
+   CMD(  MI_FLUSH_DW,  SMI,   !F,  0x3F,   B,
+ .bits = {{
+   .offset = 0,
+   .mask = MI_FLUSH_DW_NOTIFY,
+   .expected = 0,
+ }},  ),
CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   S  ),
/*
 * MFX_WAIT doesn't fit the way we handle length for most commands.
@@ -199,6 +205,12 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
CMD(  MI_ARB_ON_OFF,SMI,F,  1,  R  ),
CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0xFF,   S  ),
CMD(  MI_UPDATE_GTT,SMI,   !F,  0x3F,   R  ),
+   CMD(  MI_FLUSH_DW,  SMI,   !F,  0x3F,   B,
+ .bits = {{
+   .offset = 0,
+   .mask = MI_FLUSH_DW_NOTIFY,
+   .expected = 0,
+ }},  ),
CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   S  ),
 };
 
@@ -206,6 +218,12 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = {
CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   R  ),
CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0x3FF,  S  ),
CMD(  MI_UPDATE_GTT,SMI,   !F,  0x3F,   R  ),
+   CMD(  MI_FLUSH_DW,  SMI,   !F,  0x3F,   B,
+ .bits = {{
+   .offset = 0,
+   .mask = MI_FLUSH_DW_NOTIFY,
+   .expected = 0,
+ }},  ),
CMD(  COLOR_BLT,S2D,   !F,  0x3F,   S  ),
CMD(  SRC_COPY_BLT, S2D,   !F,  0x3F,   S  ),
 };
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 11cca96..e6dd7e9 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -269,6 +269,7 @@
 #define   MI_FLUSH_DW_STORE_INDEX  (121)
 #define   MI_INVALIDATE_TLB(118)
 #define   MI_FLUSH_DW_OP_STOREDW   (114)
+#define   MI_FLUSH_DW_NOTIFY   (18)
 #define   MI_INVALIDATE_BSD(17)
 #define   MI_FLUSH_DW_USE_GTT  (12)
 #define   MI_FLUSH_DW_USE_PPGTT(02)
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 06/13] drm/i915: Add register whitelists for mesa

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

These registers are currently used by mesa for blitting,
transform feedback extensions, and performance monitoring
extensions.

v2: REG64 macro

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 45 ++
 drivers/gpu/drm/i915/i915_reg.h| 20 +++
 2 files changed, 65 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index cf03ba6..4347a30 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -244,6 +244,45 @@ static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] 
= {
{ hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) },
 };
 
+/*
+ * Register whitelists, sorted by increasing register offset.
+ *
+ * Some registers that userspace accesses are 64 bits. The register
+ * access commands only allow 32-bit accesses. Hence, we have to include
+ * entries for both halves of the 64-bit registers.
+ */
+
+/* Convenience macro for adding 64-bit registers */
+#define REG64(addr) (addr), (addr + sizeof(u32))
+
+static const u32 gen7_render_regs[] = {
+   REG64(HS_INVOCATION_COUNT),
+   REG64(DS_INVOCATION_COUNT),
+   REG64(IA_VERTICES_COUNT),
+   REG64(IA_PRIMITIVES_COUNT),
+   REG64(VS_INVOCATION_COUNT),
+   REG64(GS_INVOCATION_COUNT),
+   REG64(GS_PRIMITIVES_COUNT),
+   REG64(CL_INVOCATION_COUNT),
+   REG64(CL_PRIMITIVES_COUNT),
+   REG64(PS_INVOCATION_COUNT),
+   REG64(PS_DEPTH_COUNT),
+   REG64(GEN7_SO_NUM_PRIMS_WRITTEN(0)),
+   REG64(GEN7_SO_NUM_PRIMS_WRITTEN(1)),
+   REG64(GEN7_SO_NUM_PRIMS_WRITTEN(2)),
+   REG64(GEN7_SO_NUM_PRIMS_WRITTEN(3)),
+   GEN7_SO_WRITE_OFFSET(0),
+   GEN7_SO_WRITE_OFFSET(1),
+   GEN7_SO_WRITE_OFFSET(2),
+   GEN7_SO_WRITE_OFFSET(3),
+};
+
+static const u32 gen7_blt_regs[] = {
+   BCS_SWCTRL,
+};
+
+#undef REG64
+
 static u32 gen7_render_get_cmd_length_mask(u32 cmd_header)
 {
u32 client = (cmd_header  INSTR_CLIENT_MASK)  INSTR_CLIENT_SHIFT;
@@ -367,6 +406,9 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer 
*ring)
ring-cmd_table_count = ARRAY_SIZE(gen7_render_cmds);
}
 
+   ring-reg_table = gen7_render_regs;
+   ring-reg_count = ARRAY_SIZE(gen7_render_regs);
+
ring-get_cmd_length_mask = gen7_render_get_cmd_length_mask;
break;
case VCS:
@@ -383,6 +425,9 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer 
*ring)
ring-cmd_table_count = ARRAY_SIZE(gen7_blt_cmds);
}
 
+   ring-reg_table = gen7_blt_regs;
+   ring-reg_count = ARRAY_SIZE(gen7_blt_regs);
+
ring-get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
break;
case VECS:
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 23be06a..1f2aeba 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -396,6 +396,26 @@
 #define SRC_COPY_BLT  ((0x229)|(0x4322))
 
 /*
+ * Registers used only by the command parser
+ */
+#define BCS_SWCTRL 0x22200
+
+#define HS_INVOCATION_COUNT 0x2300
+#define DS_INVOCATION_COUNT 0x2308
+#define IA_VERTICES_COUNT   0x2310
+#define IA_PRIMITIVES_COUNT 0x2318
+#define VS_INVOCATION_COUNT 0x2320
+#define GS_INVOCATION_COUNT 0x2328
+#define GS_PRIMITIVES_COUNT 0x2330
+#define CL_INVOCATION_COUNT 0x2338
+#define CL_PRIMITIVES_COUNT 0x2340
+#define PS_INVOCATION_COUNT 0x2348
+#define PS_DEPTH_COUNT  0x2350
+
+/* There are the 4 64-bit counter registers, one for each stream output */
+#define GEN7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8)
+
+/*
  * Reset registers
  */
 #define DEBUG_RESET_I830   0x6070
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 00/13] Gen7 batch buffer command parser

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Certain OpenGL features (e.g. transform feedback, performance monitoring)
require userspace code to submit batches containing commands such as
MI_LOAD_REGISTER_IMM to access various registers. Unfortunately, some
generations of the hardware will noop these commands in unsecure batches
(which includes all userspace batches submitted via i915) even though the
commands may be safe and represent the intended programming model of the device.

This series introduces a software command parser similar in operation to the
command parsing done in hardware for unsecure batches. However, the software
parser allows some operations that would be noop'd by hardware, if the parser
determines the operation is safe, and submits the batch as secure to prevent
hardware parsing. Currently the series implements this on IVB and HSW.

The series has one piece of prep work, one patch for the parser logic, and a
handful of patches to fill out the tables which drive the parser. There are
follow-up patches to libdrm and to i-g-t. The i-g-t tests are basic and do not
test all of the commands used by the parser on the assumption that I'm likely
to make the same mistakes in both the parser and the test.

I've previously run the i-g-t gem_* tests, the piglit quick tests, and generally
used Ubuntu 13.10 IVB and HSW systems with the parser running. Aside from a
failure described below, I did not see any regressions.

At this point there are a couple of required/potential improvements.

1) Chained batches. The parser currently allows MI_BATCH_BUFFER_START commands
   in userspace batches without parsing them. The media driver uses chained
   batches, so a solution is required. I'm still working through the
   requirements but don't want to continue delaying the review process for what
   I have so far.
2) Command buffer copy. To avoid CPU modifications to buffers after parsing, and
   to avoid GPU modifications to buffers via EUs or commands in the batch, we
   should copy the userspace batch buffer to memory that userspace does not
   have access to, map it into GGTT, and execute that batch buffer. I have a
   sense of how to do this for 1st-level batches, but it may need changes to
   tie in with the chained batch parsing, so I've again held off.
3) Coherency. I've previously found a coherency issue on VLV when reading the
   batch buffer from the CPU during execbuffer2. Userspace writes the batch via
   pwrite fast path before calling execbuffer2. The parser reads stale data.
   This works fine on IVB and HSW, so I believe it's an LLC vs. non-LLC issue.
   It's possible that the shmem pread refactoring fixes this, I just have not
   been able to retest due to lack of a VLV system.

v2:
- Significantly reorder series
- Scan secure batches (i.e. I915_EXEC_SECURE)
- Check that parser tables are sorted during init
- Fixed gem_cpu_reloc regression
- HAS_CMD_PARSER - CMD_PARSER_VERSION getparam
- Additional tests

v3:
- Don't actually send batches as secure yet
- Improved documentation and commenting
- Many other small cleanups throughout

Brad Volkin (13):
  drm/i915: Refactor shmem pread setup
  drm/i915: Implement command buffer parsing logic
  drm/i915: Initial command parser table definitions
  drm/i915: Reject privileged commands
  drm/i915: Allow some privileged commands from master
  drm/i915: Add register whitelists for mesa
  drm/i915: Add register whitelist for DRM master
  drm/i915: Enable register whitelist checks
  drm/i915: Reject commands that explicitly generate interrupts
  drm/i915: Enable PPGTT command parser checks
  drm/i915: Reject commands that would store to global HWS page
  drm/i915: Add a CMD_PARSER_VERSION getparam
  drm/i915: Enable command parsing by default

 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/i915_cmd_parser.c | 918 +
 drivers/gpu/drm/i915/i915_dma.c|   3 +
 drivers/gpu/drm/i915/i915_drv.h| 103 
 drivers/gpu/drm/i915/i915_gem.c|  51 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  18 +
 drivers/gpu/drm/i915/i915_params.c |   5 +
 drivers/gpu/drm/i915/i915_reg.h|  96 +++
 drivers/gpu/drm/i915/intel_ringbuffer.c|   2 +
 drivers/gpu/drm/i915/intel_ringbuffer.h|  32 +
 include/uapi/drm/i915_drm.h|   1 +
 11 files changed, 1216 insertions(+), 14 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_cmd_parser.c

-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 13/13] drm/i915: Enable command parsing by default

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

v2: rebased

OTC-Tracker: AXIA-4631
Change-Id: I6747457e1fe7494bd42787af51198fcba398ad78
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_params.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_params.c 
b/drivers/gpu/drm/i915/i915_params.c
index aba0b9b..9e394bc 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -48,7 +48,7 @@ struct i915_params i915 __read_mostly = {
.reset = true,
.invert_brightness = 0,
.disable_display = 0,
-   .enable_cmd_parser = 0,
+   .enable_cmd_parser = 1,
 };
 
 module_param_named(modeset, i915.modeset, int, 0400);
@@ -161,4 +161,4 @@ MODULE_PARM_DESC(disable_display, Disable display 
(default: false));
 
 module_param_named(enable_cmd_parser, i915.enable_cmd_parser, int, 0600);
 MODULE_PARM_DESC(enable_cmd_parser,
-   Enable command parsing (1=enabled, 0=disabled [default]));
+   Enable command parsing (1=enabled [default], 0=disabled));
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 3/6] tests/gem_exec_parse: Add tests for register whitelist

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 tests/gem_exec_parse.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c
index ebf7116..48fde25 100644
--- a/tests/gem_exec_parse.c
+++ b/tests/gem_exec_parse.c
@@ -141,6 +141,7 @@ int fd;
 
 #define MI_ARB_ON_OFF (0x8  23)
 #define MI_DISPLAY_FLIP ((0x14  23) | 1)
+#define MI_LOAD_REGISTER_IMM ((0x22  23) | 1)
 
 #define GFX_OP_PIPE_CONTROL((0x329)|(0x327)|(0x224)|2)
 #define   PIPE_CONTROL_QW_WRITE(114)
@@ -213,6 +214,31 @@ igt_main
  -EINVAL));
}
 
+   igt_subtest(registers) {
+   uint32_t lri_bad[] = {
+   MI_LOAD_REGISTER_IMM,
+   0, // disallowed register address
+   0x1200,
+   MI_BATCH_BUFFER_END,
+   };
+   uint32_t lri_ok[] = {
+   MI_LOAD_REGISTER_IMM,
+   0x5280, // allowed register address (SO_WRITE_OFFSET[0])
+   0x1,
+   MI_BATCH_BUFFER_END,
+   };
+   igt_assert(
+  exec_batch(fd, handle,
+ lri_bad, sizeof(lri_bad),
+ I915_EXEC_RENDER,
+ -EINVAL));
+   igt_assert(
+  exec_batch(fd, handle,
+ lri_ok, sizeof(lri_ok),
+ I915_EXEC_RENDER,
+ 0));
+   }
+
igt_fixture {
gem_close(fd, handle);
 
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 05/13] drm/i915: Allow some privileged commands from master

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

The Intel DDX uses these to implement scanline waits in the X server.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 90bbb6d..cf03ba6 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -116,7 +116,7 @@
 static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD(  MI_NOOP,  SMI,F,  1,  S  ),
CMD(  MI_USER_INTERRUPT,SMI,F,  1,  S  ),
-   CMD(  MI_WAIT_FOR_EVENT,SMI,F,  1,  R  ),
+   CMD(  MI_WAIT_FOR_EVENT,SMI,F,  1,  M  ),
CMD(  MI_ARB_CHECK, SMI,F,  1,  S  ),
CMD(  MI_REPORT_HEAD,   SMI,F,  1,  S  ),
CMD(  MI_SUSPEND_FLUSH, SMI,F,  1,  S  ),
@@ -151,7 +151,7 @@ static const struct drm_i915_cmd_descriptor 
hsw_render_cmds[] = {
CMD(  MI_RS_CONTROL,SMI,F,  1,  S  ),
CMD(  MI_URB_ATOMIC_ALLOC,  SMI,F,  1,  S  ),
CMD(  MI_RS_CONTEXT,SMI,F,  1,  S  ),
-   CMD(  MI_LOAD_SCAN_LINES_INCL,  SMI,   !F,  0x3F,   R  ),
+   CMD(  MI_LOAD_SCAN_LINES_INCL,  SMI,   !F,  0x3F,   M  ),
CMD(  MI_LOAD_SCAN_LINES_EXCL,  SMI,   !F,  0x3F,   R  ),
CMD(  MI_LOAD_REGISTER_REG, SMI,   !F,  0xFF,   R  ),
CMD(  MI_RS_STORE_DATA_IMM, SMI,   !F,  0xFF,   S  ),
@@ -196,7 +196,7 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = {
 };
 
 static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
-   CMD(  MI_LOAD_SCAN_LINES_INCL,  SMI,   !F,  0x3F,   R  ),
+   CMD(  MI_LOAD_SCAN_LINES_INCL,  SMI,   !F,  0x3F,   M  ),
CMD(  MI_LOAD_SCAN_LINES_EXCL,  SMI,   !F,  0x3F,   R  ),
 };
 
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 6/6] tests/gem_exec_parse: Test a command crossing a page boundary

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This is a speculative test in that it's not particularly relevant
today, but is important if we switch the parser implementation to
use kmap_atomic instead of vmap.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 tests/gem_exec_parse.c | 68 ++
 1 file changed, 68 insertions(+)

diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c
index 004c3bf..455bfbf 100644
--- a/tests/gem_exec_parse.c
+++ b/tests/gem_exec_parse.c
@@ -136,6 +136,60 @@ static int exec_batch(int fd, uint32_t cmd_bo, uint32_t 
*cmds,
return 1;
 }
 
+static int exec_split_batch(int fd, uint32_t *cmds,
+   int size, int ring, int expected_ret)
+{
+   struct drm_i915_gem_execbuffer2 execbuf;
+   struct drm_i915_gem_exec_object2 objs[1];
+   uint32_t cmd_bo;
+   uint32_t noop[1024] = { 0 };
+   int ret;
+
+   // Allocate and fill a 2-page batch with noops
+   cmd_bo = gem_create(fd, 4096 * 2);
+   gem_write(fd, cmd_bo, 0, noop, sizeof(noop));
+   gem_write(fd, cmd_bo, 4096, noop, sizeof(noop));
+
+   // Write the provided commands such that the first dword
+   // of the command buffer is the last dword of the first
+   // page (i.e. the command is split across the two pages).
+   gem_write(fd, cmd_bo, 4096-sizeof(uint32_t), cmds, size);
+
+   objs[0].handle = cmd_bo;
+   objs[0].relocation_count = 0;
+   objs[0].relocs_ptr = 0;
+   objs[0].alignment = 0;
+   objs[0].offset = 0;
+   objs[0].flags = 0;
+   objs[0].rsvd1 = 0;
+   objs[0].rsvd2 = 0;
+
+   execbuf.buffers_ptr = (uintptr_t)objs;
+   execbuf.buffer_count = 1;
+   execbuf.batch_start_offset = 0;
+   execbuf.batch_len = size;
+   execbuf.cliprects_ptr = 0;
+   execbuf.num_cliprects = 0;
+   execbuf.DR1 = 0;
+   execbuf.DR4 = 0;
+   execbuf.flags = ring;
+   i915_execbuffer2_set_context_id(execbuf, 0);
+   execbuf.rsvd2 = 0;
+
+   ret = drmIoctl(fd,
+  DRM_IOCTL_I915_GEM_EXECBUFFER2,
+  execbuf);
+   if (ret == 0)
+   igt_assert(expected_ret == 0);
+   else
+   igt_assert(-errno == expected_ret);
+
+   gem_sync(fd, cmd_bo);
+   gem_close(fd, cmd_bo);
+
+   return 1;
+}
+
 uint32_t handle;
 int fd;
 
@@ -266,6 +320,20 @@ igt_main
  -EINVAL));
}
 
+   igt_subtest(cmd-crossing-page) {
+   uint32_t lri_ok[] = {
+   MI_LOAD_REGISTER_IMM,
+   0x5280, // allowed register address (SO_WRITE_OFFSET[0])
+   0x1,
+   MI_BATCH_BUFFER_END,
+   };
+   igt_assert(
+  exec_split_batch(fd,
+   lri_ok, sizeof(lri_ok),
+   I915_EXEC_RENDER,
+   0));
+   }
+
igt_fixture {
gem_close(fd, handle);
 
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/6] tests/gem_exec_parse: Add tests for rejected commands

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 tests/gem_exec_parse.c | 81 ++
 1 file changed, 81 insertions(+)

diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c
index c71e478..ebf7116 100644
--- a/tests/gem_exec_parse.c
+++ b/tests/gem_exec_parse.c
@@ -93,9 +93,55 @@ static int exec_batch_patched(int fd, uint32_t cmd_bo, 
uint32_t *cmds,
return 1;
 }
 
+static int exec_batch(int fd, uint32_t cmd_bo, uint32_t *cmds,
+ int size, int ring, int expected_ret)
+{
+   struct drm_i915_gem_execbuffer2 execbuf;
+   struct drm_i915_gem_exec_object2 objs[1];
+   int ret;
+
+   gem_write(fd, cmd_bo, 0, cmds, size);
+
+   objs[0].handle = cmd_bo;
+   objs[0].relocation_count = 0;
+   objs[0].relocs_ptr = 0;
+   objs[0].alignment = 0;
+   objs[0].offset = 0;
+   objs[0].flags = 0;
+   objs[0].rsvd1 = 0;
+   objs[0].rsvd2 = 0;
+
+   execbuf.buffers_ptr = (uintptr_t)objs;
+   execbuf.buffer_count = 1;
+   execbuf.batch_start_offset = 0;
+   execbuf.batch_len = size;
+   execbuf.cliprects_ptr = 0;
+   execbuf.num_cliprects = 0;
+   execbuf.DR1 = 0;
+   execbuf.DR4 = 0;
+   execbuf.flags = ring;
+   i915_execbuffer2_set_context_id(execbuf, 0);
+   execbuf.rsvd2 = 0;
+
+   ret = drmIoctl(fd,
+  DRM_IOCTL_I915_GEM_EXECBUFFER2,
+  execbuf);
+   if (ret == 0)
+   igt_assert(expected_ret == 0);
+   else
+   igt_assert(-errno == expected_ret);
+
+   gem_sync(fd, cmd_bo);
+
+   return 1;
+}
+
 uint32_t handle;
 int fd;
 
+#define MI_ARB_ON_OFF (0x8  23)
+#define MI_DISPLAY_FLIP ((0x14  23) | 1)
+
 #define GFX_OP_PIPE_CONTROL((0x329)|(0x327)|(0x224)|2)
 #define   PIPE_CONTROL_QW_WRITE(114)
 
@@ -132,6 +178,41 @@ igt_main
   0x1200));
}
 
+   igt_subtest(basic-rejected) {
+   uint32_t arb_on_off[] = {
+   MI_ARB_ON_OFF,
+   MI_BATCH_BUFFER_END,
+   };
+   uint32_t display_flip[] = {
+   MI_DISPLAY_FLIP,
+   0, 0, 0,
+   MI_BATCH_BUFFER_END,
+   0
+   };
+   igt_assert(
+  exec_batch(fd, handle,
+ arb_on_off, sizeof(arb_on_off),
+ I915_EXEC_RENDER,
+ -EINVAL));
+   igt_assert(
+  exec_batch(fd, handle,
+ arb_on_off, sizeof(arb_on_off),
+ I915_EXEC_BSD,
+ -EINVAL));
+   if (gem_has_vebox(fd)) {
+   igt_assert(
+  exec_batch(fd, handle,
+ arb_on_off, sizeof(arb_on_off),
+ I915_EXEC_VEBOX,
+ -EINVAL));
+   }
+   igt_assert(
+  exec_batch(fd, handle,
+ display_flip, sizeof(display_flip),
+ I915_EXEC_BLT,
+ -EINVAL));
+   }
+
igt_fixture {
gem_close(fd, handle);
 
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 02/13] drm/i915: Implement command buffer parsing logic

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

The command parser scans batch buffers submitted via execbuffer ioctls before
the driver submits them to hardware. At a high level, it looks for several
things:

1) Commands which are explicitly defined as privileged or which should only be
   used by the kernel driver. The parser generally rejects such commands, with
   the provision that it may allow some from the drm master process.
2) Commands which access registers. To support correct/enhanced userspace
   functionality, particularly certain OpenGL extensions, the parser provides a
   whitelist of registers which userspace may safely access (for both normal and
   drm master processes).
3) Commands which access privileged memory (i.e. GGTT, HWS page, etc). The
   parser always rejects such commands.

See the overview comment in the source for more details.

This patch only implements the logic. Subsequent patches will build the tables
that drive the parser.

v2: Don't set the secure bit if the parser succeeds
Fail harder during init
Makefile cleanup
Kerneldoc cleanup
Clarify module param description
Convert ints to bools in a few places
Move client/subclient defs to i915_reg.h
Remove the bits_count field

OTC-Tracker: AXIA-4631
Change-Id: I50b98c71c6655893291c78a2d1b8954577b37a30
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/i915_cmd_parser.c | 485 +
 drivers/gpu/drm/i915/i915_drv.h|  93 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  18 ++
 drivers/gpu/drm/i915/i915_params.c |   5 +
 drivers/gpu/drm/i915/i915_reg.h|  12 +
 drivers/gpu/drm/i915/intel_ringbuffer.c|   2 +
 drivers/gpu/drm/i915/intel_ringbuffer.h|  32 ++
 8 files changed, 648 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_cmd_parser.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 4850494..3569122 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -14,6 +14,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \
  i915_gem_gtt.o \
  i915_gem_stolen.o \
  i915_gem_tiling.o \
+ i915_cmd_parser.o \
  i915_params.o \
  i915_sysfs.o \
  i915_trace_points.o \
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
new file mode 100644
index 000..7a5756e
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -0,0 +1,485 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *Brad Volkin bradley.d.vol...@intel.com
+ *
+ */
+
+#include i915_drv.h
+
+/**
+ * DOC: i915 batch buffer command parser
+ *
+ * Motivation:
+ * Certain OpenGL features (e.g. transform feedback, performance monitoring)
+ * require userspace code to submit batches containing commands such as
+ * MI_LOAD_REGISTER_IMM to access various registers. Unfortunately, some
+ * generations of the hardware will noop these commands in unsecure batches
+ * (which includes all userspace batches submitted via i915) even though the
+ * commands may be safe and represent the intended programming model of the
+ * device.
+ *
+ * The software command parser is similar in operation to the command parsing
+ * done in hardware for unsecure batches. However, the software parser allows
+ * some operations that would be noop'd by hardware, if the parser determines
+ * the operation is safe, and submits the batch as secure to prevent hardware
+ * parsing.
+ *
+ * Threats:
+ * At a high level, the hardware (and software) checks attempt to prevent
+ * granting userspace undue privileges. There are three categories of 
privilege.
+ *
+ * First, commands which are explicitly defined as privileged or which should
+ * only be used by the kernel driver. The 

[Intel-gfx] [PATCH 1/6] tests: Add a test for the command parser

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Start with a simple testcase that should pass.

v2: Switch to I915_PARAM_CMD_PARSER_VERSION

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 tests/.gitignore   |   1 +
 tests/Makefile.sources |   1 +
 tests/gem_exec_parse.c | 140 +
 3 files changed, 142 insertions(+)
 create mode 100644 tests/gem_exec_parse.c

diff --git a/tests/.gitignore b/tests/.gitignore
index cb548a8..8b0b790 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -35,6 +35,7 @@ gem_exec_blt
 gem_exec_faulting_reloc
 gem_exec_lut_handle
 gem_exec_nop
+gem_exec_parse
 gem_fd_exhaustion
 gem_fenced_exec_thrash
 gem_fence_thrash
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index afb2582..2475f7e 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -29,6 +29,7 @@ TESTS_progs_M = \
gem_exec_bad_domains \
gem_exec_faulting_reloc \
gem_exec_nop \
+   gem_exec_parse \
gem_fenced_exec_thrash \
gem_fence_thrash \
gem_flink \
diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c
new file mode 100644
index 000..c71e478
--- /dev/null
+++ b/tests/gem_exec_parse.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include stdlib.h
+#include stdint.h
+#include stdio.h
+#include drm.h
+#include i915_drm.h
+#include drmtest.h
+
+#ifndef I915_PARAM_CMD_PARSER_VERSION
+#define I915_PARAM_CMD_PARSER_VERSION   28
+#endif
+
+static int exec_batch_patched(int fd, uint32_t cmd_bo, uint32_t *cmds,
+ int size, int patch_offset, uint64_t 
expected_value)
+{
+   struct drm_i915_gem_execbuffer2 execbuf;
+   struct drm_i915_gem_exec_object2 objs[2];
+   struct drm_i915_gem_relocation_entry reloc[1];
+
+   uint32_t target_bo = gem_create(fd, 4096);
+   uint64_t actual_value = 0;
+
+   gem_write(fd, cmd_bo, 0, cmds, size);
+
+   reloc[0].offset = patch_offset;
+   reloc[0].delta = 0;
+   reloc[0].target_handle = target_bo;
+   reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+   reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+   reloc[0].presumed_offset = 0;
+
+   objs[0].handle = target_bo;
+   objs[0].relocation_count = 0;
+   objs[0].relocs_ptr = 0;
+   objs[0].alignment = 0;
+   objs[0].offset = 0;
+   objs[0].flags = 0;
+   objs[0].rsvd1 = 0;
+   objs[0].rsvd2 = 0;
+
+   objs[1].handle = cmd_bo;
+   objs[1].relocation_count = 1;
+   objs[1].relocs_ptr = (uintptr_t)reloc;
+   objs[1].alignment = 0;
+   objs[1].offset = 0;
+   objs[1].flags = 0;
+   objs[1].rsvd1 = 0;
+   objs[1].rsvd2 = 0;
+
+   execbuf.buffers_ptr = (uintptr_t)objs;
+   execbuf.buffer_count = 2;
+   execbuf.batch_start_offset = 0;
+   execbuf.batch_len = size;
+   execbuf.cliprects_ptr = 0;
+   execbuf.num_cliprects = 0;
+   execbuf.DR1 = 0;
+   execbuf.DR4 = 0;
+   execbuf.flags = I915_EXEC_RENDER;
+   i915_execbuffer2_set_context_id(execbuf, 0);
+   execbuf.rsvd2 = 0;
+
+   gem_execbuf(fd, execbuf);
+   gem_sync(fd, cmd_bo);
+
+   gem_read(fd,target_bo, 0, actual_value, sizeof(actual_value));
+   igt_assert(expected_value == actual_value);
+
+   gem_close(fd, target_bo);
+
+   return 1;
+}
+
+uint32_t handle;
+int fd;
+
+#define GFX_OP_PIPE_CONTROL((0x329)|(0x327)|(0x224)|2)
+#define   PIPE_CONTROL_QW_WRITE(114)
+
+igt_main
+{
+   igt_fixture {
+   int parser_version = 0;
+drm_i915_getparam_t gp;
+   int rc;
+
+   fd = drm_open_any();
+
+   gp.param = I915_PARAM_CMD_PARSER_VERSION;
+   gp.value = parser_version;
+   rc = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, 

[Intel-gfx] [PATCH 4/6] tests/gem_exec_parse: Add tests for bitmask checks

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 tests/gem_exec_parse.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c
index 48fde25..9e90408 100644
--- a/tests/gem_exec_parse.c
+++ b/tests/gem_exec_parse.c
@@ -145,6 +145,7 @@ int fd;
 
 #define GFX_OP_PIPE_CONTROL((0x329)|(0x327)|(0x224)|2)
 #define   PIPE_CONTROL_QW_WRITE(114)
+#define   PIPE_CONTROL_LRI_POST_OP (123)
 
 igt_main
 {
@@ -239,6 +240,23 @@ igt_main
  0));
}
 
+   igt_subtest(bitmasks) {
+   uint32_t pc[] = {
+   GFX_OP_PIPE_CONTROL,
+   (PIPE_CONTROL_QW_WRITE |
+PIPE_CONTROL_LRI_POST_OP),
+   0, // To be patched
+   0x1200,
+   0,
+   MI_BATCH_BUFFER_END,
+   };
+   igt_assert(
+  exec_batch(fd, handle,
+ pc, sizeof(pc),
+ I915_EXEC_RENDER,
+ -EINVAL));
+   }
+
igt_fixture {
gem_close(fd, handle);
 
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 03/13] drm/i915: Initial command parser table definitions

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Add command tables defining irregular length commands for each ring.
This requires a few new command opcode definitions.

v2: Whitespace adjustment in command definitions, sparse fix for !F

OTC-Tracker: AXIA-4631
Change-Id: I064bceb457e15f46928058352afe76d918c58ef5
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 157 +
 drivers/gpu/drm/i915/i915_reg.h|  46 ++
 2 files changed, 203 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 7a5756e..12241e8 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -86,6 +86,148 @@
  * general bitmasking mechanism.
  */
 
+#define STD_MI_OPCODE_MASK  0xFF80
+#define STD_3D_OPCODE_MASK  0x
+#define STD_2D_OPCODE_MASK  0xFFC0
+#define STD_MFX_OPCODE_MASK 0x
+
+#define CMD(op, opm, f, lm, fl, ...)   \
+   {   \
+   .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \
+   .cmd = { (op), (opm) }, \
+   .length = { (lm) }, \
+   __VA_ARGS__ \
+   }
+
+/* Convenience macros to compress the tables */
+#define SMI STD_MI_OPCODE_MASK
+#define S3D STD_3D_OPCODE_MASK
+#define S2D STD_2D_OPCODE_MASK
+#define SMFX STD_MFX_OPCODE_MASK
+#define F true
+#define S CMD_DESC_SKIP
+#define R CMD_DESC_REJECT
+#define W CMD_DESC_REGISTER
+#define B CMD_DESC_BITMASK
+#define M CMD_DESC_MASTER
+
+/*Command  Mask   Fixed Len   Action
+ -- */
+static const struct drm_i915_cmd_descriptor common_cmds[] = {
+   CMD(  MI_NOOP,  SMI,F,  1,  S  ),
+   CMD(  MI_USER_INTERRUPT,SMI,F,  1,  S  ),
+   CMD(  MI_WAIT_FOR_EVENT,SMI,F,  1,  S  ),
+   CMD(  MI_ARB_CHECK, SMI,F,  1,  S  ),
+   CMD(  MI_REPORT_HEAD,   SMI,F,  1,  S  ),
+   CMD(  MI_SUSPEND_FLUSH, SMI,F,  1,  S  ),
+   CMD(  MI_SEMAPHORE_MBOX,SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_STORE_DWORD_INDEX, SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_LOAD_REGISTER_IMM(1),  SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_BATCH_BUFFER_START,SMI,   !F,  0xFF,   S  ),
+};
+
+static const struct drm_i915_cmd_descriptor render_cmds[] = {
+   CMD(  MI_FLUSH, SMI,F,  1,  S  ),
+   CMD(  MI_ARB_ON_OFF,SMI,F,  1,  S  ),
+   CMD(  MI_PREDICATE, SMI,F,  1,  S  ),
+   CMD(  MI_TOPOLOGY_FILTER,   SMI,F,  1,  S  ),
+   CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_SET_CONTEXT,   SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_URB_CLEAR, SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_UPDATE_GTT,SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_CLFLUSH,   SMI,   !F,  0x3FF,  S  ),
+   CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   S  ),
+   CMD(  GFX_OP_3DSTATE_VF_STATISTICS, S3D,F,  1,  S  ),
+   CMD(  PIPELINE_SELECT,  S3D,F,  1,  S  ),
+   CMD(  GPGPU_OBJECT, S3D,   !F,  0xFF,   S  ),
+   CMD(  GPGPU_WALKER, S3D,   !F,  0xFF,   S  ),
+   CMD(  GFX_OP_3DSTATE_SO_DECL_LIST,  S3D,   !F,  0x1FF,  S  ),
+};
+
+static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
+   CMD(  MI_SET_PREDICATE, SMI,F,  1,  S  ),
+   CMD(  MI_RS_CONTROL,SMI,F,  1,  S  ),
+   CMD(  MI_URB_ATOMIC_ALLOC,  SMI,F,  1,  S  ),
+   CMD(  MI_RS_CONTEXT,SMI,F,  1,  S  ),
+   CMD(  MI_LOAD_REGISTER_REG, SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_RS_STORE_DATA_IMM, SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_LOAD_URB_MEM,  SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_STORE_URB_MEM, SMI,   !F,  0xFF,   S  ),
+   CMD(  GFX_OP_3DSTATE_DX9_CONSTANTF_VS,  S3D,   !F,  0x7FF,  S  ),
+   CMD(  GFX_OP_3DSTATE_DX9_CONSTANTF_PS,  S3D,   !F,  0x7FF,  S  ),
+
+   CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS,  S3D,   !F,  0x1FF,  S  ),
+   CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS,  S3D,   !F,  0x1FF,  S  ),
+   CMD(  

[Intel-gfx] [PATCH] intel: Merge i915_drm.h with cmd parser define

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 include/drm/i915_drm.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 2f4eb8c..ba863c4 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -27,7 +27,7 @@
 #ifndef _I915_DRM_H_
 #define _I915_DRM_H_
 
-#include drm.h
+#include drm/drm.h
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
@@ -337,6 +337,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_NO_RELOC25
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
 #define I915_PARAM_HAS_WT   27
+#define I915_PARAM_CMD_PARSER_VERSION   28
 
 typedef struct drm_i915_getparam {
int param;
@@ -721,7 +722,7 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_IS_PINNED(110)
 
-/** Provide a hint to the kernel that the command stream and auxilliary
+/** Provide a hint to the kernel that the command stream and auxiliary
  * state buffers already holds the correct presumed addresses and so the
  * relocation process may be skipped if no buffers need to be moved in
  * preparation for the execbuffer.
-- 
1.8.5.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 10/13] drm/i915: Enable PPGTT command parser checks

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Various commands that access memory have a bit to determine whether
the graphics address specified in the command should use the GGTT or
PPGTT for translation. These checks ensure that the bit indicates
PPGTT translation.

Most of these checks use the existing bit-checking infrastructure.
The PIPE_CONTROL and MI_FLUSH_DW commands, however, are multi-function
commands. The GGTT/PPGTT bit is only relevant for certain uses of the
command. As such, this change also extends the bit-checking code to
include a condition mask and offset. If the condition mask is non-zero
then the parser only performs the bit check when the bits specified by
the condition mask/offset are also non-zero.

NOTE: At this point in the series PPGTT must be enabled for the parser
to work correctly. If it's not enabled, userspace will not be setting
the PPGTT bits the way the parser requires. VLV is the only platform
where this is a problem, so at this point, we disable parsing for VLV.

v2: whitespace and trailing commas fixes, rebased

OTC-Tracker: AXIA-4631
Change-Id: I3f4c76b6734f1956ec47e698230f97d0998ff92b
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 128 ++---
 drivers/gpu/drm/i915/i915_drv.h|   6 ++
 drivers/gpu/drm/i915/i915_reg.h|   6 ++
 3 files changed, 129 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 0351df1..1528549 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -124,10 +124,20 @@ static const struct drm_i915_cmd_descriptor common_cmds[] 
= {
CMD(  MI_STORE_DWORD_INDEX, SMI,   !F,  0xFF,   R  ),
CMD(  MI_LOAD_REGISTER_IMM(1),  SMI,   !F,  0xFF,   W,
  .reg = { .offset = 1, .mask = 0x007C }   ),
-   CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   W,
- .reg = { .offset = 1, .mask = 0x007C }   ),
-   CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   W,
- .reg = { .offset = 1, .mask = 0x007C }   ),
+   CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   W | B,
+ .reg = { .offset = 1, .mask = 0x007C },
+ .bits = {{
+   .offset = 0,
+   .mask = MI_GLOBAL_GTT,
+   .expected = 0,
+ }},  ),
+   CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   W | B,
+ .reg = { .offset = 1, .mask = 0x007C },
+ .bits = {{
+   .offset = 0,
+   .mask = MI_GLOBAL_GTT,
+   .expected = 0,
+ }},  ),
CMD(  MI_BATCH_BUFFER_START,SMI,   !F,  0xFF,   S  ),
 };
 
@@ -139,9 +149,31 @@ static const struct drm_i915_cmd_descriptor render_cmds[] 
= {
CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   R  ),
CMD(  MI_SET_CONTEXT,   SMI,   !F,  0xFF,   R  ),
CMD(  MI_URB_CLEAR, SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0x3F,   B,
+ .bits = {{
+   .offset = 0,
+   .mask = MI_GLOBAL_GTT,
+   .expected = 0,
+ }},  ),
CMD(  MI_UPDATE_GTT,SMI,   !F,  0xFF,   R  ),
-   CMD(  MI_CLFLUSH,   SMI,   !F,  0x3FF,  S  ),
-   CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_CLFLUSH,   SMI,   !F,  0x3FF,  B,
+ .bits = {{
+   .offset = 0,
+   .mask = MI_GLOBAL_GTT,
+   .expected = 0,
+ }},  ),
+   CMD(  MI_REPORT_PERF_COUNT, SMI,   !F,  0x3F,   B,
+ .bits = {{
+   .offset = 1,
+   .mask = MI_REPORT_PERF_COUNT_GGTT,
+   .expected = 0,
+ }},  ),
+   CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   B,
+ .bits = {{
+   .offset = 0,
+   .mask = MI_GLOBAL_GTT,
+   .expected = 0,
+ }},  ),
CMD(  GFX_OP_3DSTATE_VF_STATISTICS, S3D,F,  1,  S  ),
CMD(  PIPELINE_SELECT,  S3D,F,  1,  S  ),
CMD(  MEDIA_VFE_STATE,  S3D,   !F,  0x, B,
@@ -158,6 +190,13 @@ static const 

[Intel-gfx] [PATCH 01/13] drm/i915: Refactor shmem pread setup

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

The command parser is going to need the same synchronization and
setup logic, so factor it out for reuse.

v2: Add a check that the object is backed by shmem

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h |  3 +++
 drivers/gpu/drm/i915/i915_gem.c | 51 ++---
 2 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8c64831..582035b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2097,6 +2097,9 @@ void i915_gem_release_all_mmaps(struct drm_i915_private 
*dev_priv);
 void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
 void i915_gem_lastclose(struct drm_device *dev);
 
+int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
+   int *needs_clflush);
+
 int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object 
*obj, int n)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3618bb0..83990cb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -326,6 +326,42 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
return 0;
 }
 
+/*
+ * Pins the specified object's pages and synchronizes the object with
+ * GPU accesses. Sets needs_clflush to non-zero if the caller should
+ * flush the object from the CPU cache.
+ */
+int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
+   int *needs_clflush)
+{
+   int ret;
+
+   *needs_clflush = 0;
+
+   if (!obj-base.filp)
+   return -EINVAL;
+
+   if (!(obj-base.read_domains  I915_GEM_DOMAIN_CPU)) {
+   /* If we're not in the cpu read domain, set ourself into the gtt
+* read domain and manually flush cachelines (if required). This
+* optimizes for the case when the gpu will dirty the data
+* anyway again before the next pread happens. */
+   *needs_clflush = !cpu_cache_is_coherent(obj-base.dev,
+   obj-cache_level);
+   ret = i915_gem_object_wait_rendering(obj, true);
+   if (ret)
+   return ret;
+   }
+
+   ret = i915_gem_object_get_pages(obj);
+   if (ret)
+   return ret;
+
+   i915_gem_object_pin_pages(obj);
+
+   return ret;
+}
+
 /* Per-page copy function for the shmem pread fastpath.
  * Flushes invalid cachelines before reading the target if
  * needs_clflush is set. */
@@ -423,23 +459,10 @@ i915_gem_shmem_pread(struct drm_device *dev,
 
obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 
-   if (!(obj-base.read_domains  I915_GEM_DOMAIN_CPU)) {
-   /* If we're not in the cpu read domain, set ourself into the gtt
-* read domain and manually flush cachelines (if required). This
-* optimizes for the case when the gpu will dirty the data
-* anyway again before the next pread happens. */
-   needs_clflush = !cpu_cache_is_coherent(dev, obj-cache_level);
-   ret = i915_gem_object_wait_rendering(obj, true);
-   if (ret)
-   return ret;
-   }
-
-   ret = i915_gem_object_get_pages(obj);
+   ret = i915_gem_obj_prepare_shmem_read(obj, needs_clflush);
if (ret)
return ret;
 
-   i915_gem_object_pin_pages(obj);
-
offset = args-offset;
 
for_each_sg_page(obj-pages-sgl, sg_iter, obj-pages-nents,
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 08/13] drm/i915: Enable register whitelist checks

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

MI_STORE_REGISTER_MEM, MI_LOAD_REGISTER_MEM, and MI_LOAD_REGISTER_IMM
commands allow userspace access to registers. Only certain registers
should be allowed for such access, so enable checking for those commands.
Each ring gets its own register whitelist.

MI_LOAD_REGISTER_REG on HSW also allows register access but is currently
unused by userspace components. Leave it rejected.

PIPE_CONTROL and MEDIA_VFE_STATE allow register access based on certain
bits being set. Reject those as well.

v2: trailing commas, rebased

OTC-Tracker: AXIA-4631
Change-Id: Ie614a2f0eb2e5917de809e5a17957175d24cc44f
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 21 ++---
 drivers/gpu/drm/i915/i915_reg.h|  3 +++
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 353e5cf..4f14a24 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -122,9 +122,12 @@ static const struct drm_i915_cmd_descriptor common_cmds[] 
= {
CMD(  MI_SUSPEND_FLUSH, SMI,F,  1,  S  ),
CMD(  MI_SEMAPHORE_MBOX,SMI,   !F,  0xFF,   R  ),
CMD(  MI_STORE_DWORD_INDEX, SMI,   !F,  0xFF,   R  ),
-   CMD(  MI_LOAD_REGISTER_IMM(1),  SMI,   !F,  0xFF,   R  ),
-   CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   R  ),
-   CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   R  ),
+   CMD(  MI_LOAD_REGISTER_IMM(1),  SMI,   !F,  0xFF,   W,
+ .reg = { .offset = 1, .mask = 0x007C }   ),
+   CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   W,
+ .reg = { .offset = 1, .mask = 0x007C }   ),
+   CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   W,
+ .reg = { .offset = 1, .mask = 0x007C }   ),
CMD(  MI_BATCH_BUFFER_START,SMI,   !F,  0xFF,   S  ),
 };
 
@@ -141,9 +144,21 @@ static const struct drm_i915_cmd_descriptor render_cmds[] 
= {
CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   S  ),
CMD(  GFX_OP_3DSTATE_VF_STATISTICS, S3D,F,  1,  S  ),
CMD(  PIPELINE_SELECT,  S3D,F,  1,  S  ),
+   CMD(  MEDIA_VFE_STATE,  S3D,   !F,  0x, B,
+ .bits = {{
+   .offset = 2,
+   .mask = MEDIA_VFE_STATE_MMIO_ACCESS_MASK,
+   .expected = 0,
+ }},  ),
CMD(  GPGPU_OBJECT, S3D,   !F,  0xFF,   S  ),
CMD(  GPGPU_WALKER, S3D,   !F,  0xFF,   S  ),
CMD(  GFX_OP_3DSTATE_SO_DECL_LIST,  S3D,   !F,  0x1FF,  S  ),
+   CMD(  GFX_OP_PIPE_CONTROL(5),   S3D,   !F,  0xFF,   B,
+ .bits = {{
+   .offset = 1,
+   .mask = PIPE_CONTROL_MMIO_WRITE,
+   .expected = 0,
+ }},  ),
 };
 
 static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 87523df..11cca96 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -330,6 +330,7 @@
 #define   DISPLAY_PLANE_B   (120)
 #define GFX_OP_PIPE_CONTROL(len)   ((0x329)|(0x327)|(0x224)|(len-2))
 #define   PIPE_CONTROL_GLOBAL_GTT_IVB  (124) /* gen7+ */
+#define   PIPE_CONTROL_MMIO_WRITE  (123)
 #define   PIPE_CONTROL_CS_STALL(120)
 #define   PIPE_CONTROL_TLB_INVALIDATE  (118)
 #define   PIPE_CONTROL_QW_WRITE(114)
@@ -370,6 +371,8 @@
 
 #define PIPELINE_SELECT
((0x329)|(0x127)|(0x124)|(0x416))
 #define GFX_OP_3DSTATE_VF_STATISTICS   
((0x329)|(0x127)|(0x024)|(0xB16))
+#define MEDIA_VFE_STATE
((0x329)|(0x227)|(0x024)|(0x016))
+#define  MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18)
 #define GPGPU_OBJECT   
((0x329)|(0x227)|(0x124)|(0x416))
 #define GPGPU_WALKER   
((0x329)|(0x227)|(0x124)|(0x516))
 #define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 11/13] drm/i915: Reject commands that would store to global HWS page

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

PIPE_CONTROL and MI_FLUSH_DW have bits that would write to the
hardware status page. The driver stores request tracking info
there, so don't let userspace overwrite it.

v2: trailing comma fix, rebased

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 24 +++-
 drivers/gpu/drm/i915/i915_reg.h|  1 +
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 1528549..f9aa01a 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -193,7 +193,8 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = 
{
  },
  {
.offset = 1,
-   .mask = PIPE_CONTROL_GLOBAL_GTT_IVB,
+   .mask = (PIPE_CONTROL_GLOBAL_GTT_IVB |
+PIPE_CONTROL_STORE_DATA_INDEX),
.expected = 0,
.condition_offset = 1,
.condition_mask = PIPE_CONTROL_POST_SYNC_OP_MASK,
@@ -242,6 +243,13 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = 
{
.expected = 0,
.condition_offset = 0,
.condition_mask = MI_FLUSH_DW_OP_MASK,
+ },
+ {
+   .offset = 0,
+   .mask = MI_FLUSH_DW_STORE_INDEX,
+   .expected = 0,
+   .condition_offset = 0,
+   .condition_mask = MI_FLUSH_DW_OP_MASK,
  }},  ),
CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   B,
  .bits = {{
@@ -278,6 +286,13 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
.expected = 0,
.condition_offset = 0,
.condition_mask = MI_FLUSH_DW_OP_MASK,
+ },
+ {
+   .offset = 0,
+   .mask = MI_FLUSH_DW_STORE_INDEX,
+   .expected = 0,
+   .condition_offset = 0,
+   .condition_mask = MI_FLUSH_DW_OP_MASK,
  }},  ),
CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   B,
  .bits = {{
@@ -308,6 +323,13 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = {
.expected = 0,
.condition_offset = 0,
.condition_mask = MI_FLUSH_DW_OP_MASK,
+ },
+ {
+   .offset = 0,
+   .mask = MI_FLUSH_DW_STORE_INDEX,
+   .expected = 0,
+   .condition_offset = 0,
+   .condition_mask = MI_FLUSH_DW_OP_MASK,
  }},  ),
CMD(  COLOR_BLT,S2D,   !F,  0x3F,   S  ),
CMD(  SRC_COPY_BLT, S2D,   !F,  0x3F,   S  ),
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e683b31..46db649 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -335,6 +335,7 @@
 #define GFX_OP_PIPE_CONTROL(len)   ((0x329)|(0x327)|(0x224)|(len-2))
 #define   PIPE_CONTROL_GLOBAL_GTT_IVB  (124) /* gen7+ */
 #define   PIPE_CONTROL_MMIO_WRITE  (123)
+#define   PIPE_CONTROL_STORE_DATA_INDEX(121)
 #define   PIPE_CONTROL_CS_STALL(120)
 #define   PIPE_CONTROL_TLB_INVALIDATE  (118)
 #define   PIPE_CONTROL_QW_WRITE(114)
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 12/13] drm/i915: Add a CMD_PARSER_VERSION getparam

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

So userspace can query the kernel for command parser support.

v2: Add i915_cmd_parser_get_version(), history log, and kerneldoc

OTC-Tracker: AXIA-4631
Change-Id: I58af650db9f6753c2dcac9c54ab432fd31db302f
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 19 +++
 drivers/gpu/drm/i915/i915_dma.c|  3 +++
 drivers/gpu/drm/i915/i915_drv.h|  1 +
 include/uapi/drm/i915_drm.h|  1 +
 4 files changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index f9aa01a..23c8174 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -897,3 +897,22 @@ int i915_parse_cmds(struct intel_ring_buffer *ring,
 
return ret;
 }
+
+/**
+ * i915_cmd_parser_get_version() - get the cmd parser version number
+ *
+ * The cmd parser maintains a simple increasing integer version number suitable
+ * for passing to userspace clients to determine what operations are permitted.
+ *
+ * Return: the current version number of the cmd parser
+ */
+int i915_cmd_parser_get_version(void)
+{
+   /*
+* Command parser version history
+*
+* 1. Initial version. Checks batches and reports violations, but leaves
+*hardware parsing enabled (so does not allow new use cases).
+*/
+   return 1;
+}
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 7688abc..14875f5 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1017,6 +1017,9 @@ static int i915_getparam(struct drm_device *dev, void 
*data,
case I915_PARAM_HAS_EXEC_HANDLE_LUT:
value = 1;
break;
+   case I915_PARAM_CMD_PARSER_VERSION:
+   value = i915_cmd_parser_get_version();
+   break;
default:
DRM_DEBUG(Unknown parameter %d\n, param-param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 27a48d9..6294d61 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2582,6 +2582,7 @@ void i915_get_extra_instdone(struct drm_device *dev, 
uint32_t *instdone);
 const char *i915_cache_level_str(int type);
 
 /* i915_cmd_parser.c */
+int i915_cmd_parser_get_version(void);
 void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring);
 bool i915_needs_cmd_parser(struct intel_ring_buffer *ring);
 int i915_parse_cmds(struct intel_ring_buffer *ring,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 126bfaa..8a3e4ef00 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -337,6 +337,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_NO_RELOC25
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
 #define I915_PARAM_HAS_WT   27
+#define I915_PARAM_CMD_PARSER_VERSION   28
 
 typedef struct drm_i915_getparam {
int param;
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 04/13] drm/i915: Reject privileged commands

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

The spec defines most of these commands as privileged. A few others,
like the semaphore mbox command and some display commands, are also
reserved for the driver's use. Subsequent patches relax some of
these restrictions.

v2: Rebased

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 54 --
 drivers/gpu/drm/i915/i915_reg.h|  1 +
 2 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 12241e8..90bbb6d 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -116,27 +116,27 @@
 static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD(  MI_NOOP,  SMI,F,  1,  S  ),
CMD(  MI_USER_INTERRUPT,SMI,F,  1,  S  ),
-   CMD(  MI_WAIT_FOR_EVENT,SMI,F,  1,  S  ),
+   CMD(  MI_WAIT_FOR_EVENT,SMI,F,  1,  R  ),
CMD(  MI_ARB_CHECK, SMI,F,  1,  S  ),
CMD(  MI_REPORT_HEAD,   SMI,F,  1,  S  ),
CMD(  MI_SUSPEND_FLUSH, SMI,F,  1,  S  ),
-   CMD(  MI_SEMAPHORE_MBOX,SMI,   !F,  0xFF,   S  ),
-   CMD(  MI_STORE_DWORD_INDEX, SMI,   !F,  0xFF,   S  ),
-   CMD(  MI_LOAD_REGISTER_IMM(1),  SMI,   !F,  0xFF,   S  ),
-   CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   S  ),
-   CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_SEMAPHORE_MBOX,SMI,   !F,  0xFF,   R  ),
+   CMD(  MI_STORE_DWORD_INDEX, SMI,   !F,  0xFF,   R  ),
+   CMD(  MI_LOAD_REGISTER_IMM(1),  SMI,   !F,  0xFF,   R  ),
+   CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   R  ),
+   CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   R  ),
CMD(  MI_BATCH_BUFFER_START,SMI,   !F,  0xFF,   S  ),
 };
 
 static const struct drm_i915_cmd_descriptor render_cmds[] = {
CMD(  MI_FLUSH, SMI,F,  1,  S  ),
-   CMD(  MI_ARB_ON_OFF,SMI,F,  1,  S  ),
+   CMD(  MI_ARB_ON_OFF,SMI,F,  1,  R  ),
CMD(  MI_PREDICATE, SMI,F,  1,  S  ),
CMD(  MI_TOPOLOGY_FILTER,   SMI,F,  1,  S  ),
-   CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   S  ),
-   CMD(  MI_SET_CONTEXT,   SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   R  ),
+   CMD(  MI_SET_CONTEXT,   SMI,   !F,  0xFF,   R  ),
CMD(  MI_URB_CLEAR, SMI,   !F,  0xFF,   S  ),
-   CMD(  MI_UPDATE_GTT,SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_UPDATE_GTT,SMI,   !F,  0xFF,   R  ),
CMD(  MI_CLFLUSH,   SMI,   !F,  0x3FF,  S  ),
CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   S  ),
CMD(  GFX_OP_3DSTATE_VF_STATISTICS, S3D,F,  1,  S  ),
@@ -151,7 +151,9 @@ static const struct drm_i915_cmd_descriptor 
hsw_render_cmds[] = {
CMD(  MI_RS_CONTROL,SMI,F,  1,  S  ),
CMD(  MI_URB_ATOMIC_ALLOC,  SMI,F,  1,  S  ),
CMD(  MI_RS_CONTEXT,SMI,F,  1,  S  ),
-   CMD(  MI_LOAD_REGISTER_REG, SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_LOAD_SCAN_LINES_INCL,  SMI,   !F,  0x3F,   R  ),
+   CMD(  MI_LOAD_SCAN_LINES_EXCL,  SMI,   !F,  0x3F,   R  ),
+   CMD(  MI_LOAD_REGISTER_REG, SMI,   !F,  0xFF,   R  ),
CMD(  MI_RS_STORE_DATA_IMM, SMI,   !F,  0xFF,   S  ),
CMD(  MI_LOAD_URB_MEM,  SMI,   !F,  0xFF,   S  ),
CMD(  MI_STORE_URB_MEM, SMI,   !F,  0xFF,   S  ),
@@ -166,8 +168,9 @@ static const struct drm_i915_cmd_descriptor 
hsw_render_cmds[] = {
 };
 
 static const struct drm_i915_cmd_descriptor video_cmds[] = {
-   CMD(  MI_ARB_ON_OFF,SMI,F,  1,  S  ),
+   CMD(  MI_ARB_ON_OFF,SMI,F,  1,  R  ),
CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_UPDATE_GTT,SMI,   !F,  0x3F,   R  ),
CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   S  ),
/*
 * MFX_WAIT doesn't fit the way we handle length for most commands.
@@ -178,18 +181,25 @@ static const struct drm_i915_cmd_descriptor video_cmds[] 
= {
 };
 
 static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
-   CMD(  MI_ARB_ON_OFF,SMI,F,  1,  S  ),
+ 

[Intel-gfx] [PATCH 5/6] tests/gem_exec_parse: Test for batches w/o MI_BATCH_BUFFER_END

2014-02-18 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 tests/gem_exec_parse.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c
index 9e90408..004c3bf 100644
--- a/tests/gem_exec_parse.c
+++ b/tests/gem_exec_parse.c
@@ -257,6 +257,15 @@ igt_main
  -EINVAL));
}
 
+   igt_subtest(batch-without-end) {
+   uint32_t noop[1024] = { 0 };
+   igt_assert(
+  exec_batch(fd, handle,
+ noop, sizeof(noop),
+ I915_EXEC_RENDER,
+ -EINVAL));
+   }
+
igt_fixture {
gem_close(fd, handle);
 
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 00/13] Gen7 batch buffer command parser

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Certain OpenGL features (e.g. transform feedback, performance monitoring)
require userspace code to submit batches containing commands such as
MI_LOAD_REGISTER_IMM to access various registers. Unfortunately, some
generations of the hardware will noop these commands in unsecure batches
(which includes all userspace batches submitted via i915) even though the
commands may be safe and represent the intended programming model of the device.

This series introduces a software command parser similar in operation to the
command parsing done in hardware for unsecure batches. However, the software
parser allows some operations that would be noop'd by hardware, if the parser
determines the operation is safe, and submits the batch as secure to prevent
hardware parsing. Currently the series implements this on IVB and HSW.

The series has one piece of prep work, one patch for the parser logic, and a
handful of patches to fill out the tables which drive the parser. There are
follow-up patches to libdrm and to i-g-t. The i-g-t tests are basic and do not
test all of the commands used by the parser on the assumption that I'm likely
to make the same mistakes in both the parser and the test.

WARNING!!!
I've previously run the i-g-t gem_* tests, the piglit quick tests, and generally
used Ubuntu 13.10 IVB and HSW systems with the parser running. Aside from a
failure described below, I did not see any regressions. However, the series
currently hits a BUG_ON() if you enable the parser due to a regression in secure
batch handling on -nightly.

At this point there are a couple of required/potential improvements.

1) Chained batches. The parser currently allows MI_BATCH_BUFFER_START commands
   in userspace batches without parsing them. The media driver uses chained
   batches, so a solution is required. I'm still working through the
   requirements but don't want to continue delaying the review process for what
   I have so far.
2) Command buffer copy. To avoid CPU modifications to buffers after parsing, and
   to avoid GPU modifications to buffers via EUs or commands in the batch, we
   should copy the userspace batch buffer to memory that userspace does not
   have access to, map it into GGTT, and execute that batch buffer. I have a
   sense of how to do this for 1st-level batches, but it may need changes to
   tie in with the chained batch parsing, so I've again held off.
3) Coherency. I've found a coherency issue on VLV when reading the batch buffer
   from the CPU during execbuffer2. Userspace writes the batch via pwrite fast
   path before calling execbuffer2. The parser reads stale data. This works fine
   on IVB and HSW, so I believe it's an LLC vs. non-LLC issue. I'm just unclear
   on what the correct flushing or synchronization is for this scenario. This
   only matters if we get PPGTT working on VLV and enable the parser there.

v2:
- Significantly reorder series
- Scan secure batches (i.e. I915_EXEC_SECURE)
- Check that parser tables are sorted during init
- Fixed gem_cpu_reloc regression
- HAS_CMD_PARSER - CMD_PARSER_VERSION getparam
- Additional tests

Brad Volkin (13):
  drm/i915: Refactor shmem pread setup
  drm/i915: Implement command buffer parsing logic
  drm/i915: Initial command parser table definitions
  drm/i915: Reject privileged commands
  drm/i915: Allow some privileged commands from master
  drm/i915: Add register whitelists for mesa
  drm/i915: Add register whitelist for DRM master
  drm/i915: Enable register whitelist checks
  drm/i915: Reject commands that explicitly generate interrupts
  drm/i915: Enable PPGTT command parser checks
  drm/i915: Reject commands that would store to global HWS page
  drm/i915: Add a CMD_PARSER_VERSION getparam
  drm/i915: Enable command parsing by default

 drivers/gpu/drm/i915/Makefile  |   3 +-
 drivers/gpu/drm/i915/i915_cmd_parser.c | 845 +
 drivers/gpu/drm/i915/i915_dma.c|   4 +
 drivers/gpu/drm/i915/i915_drv.h| 103 
 drivers/gpu/drm/i915/i915_gem.c|  48 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  17 +
 drivers/gpu/drm/i915/i915_params.c |   5 +
 drivers/gpu/drm/i915/i915_reg.h|  78 +++
 drivers/gpu/drm/i915/intel_ringbuffer.c|   2 +
 drivers/gpu/drm/i915/intel_ringbuffer.h|  32 ++
 include/uapi/drm/i915_drm.h|   1 +
 11 files changed, 1123 insertions(+), 15 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_cmd_parser.c

-- 
1.8.5.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 07/13] drm/i915: Add register whitelist for DRM master

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

These are used to implement scanline waits in the X server.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 18d5b05..296e322 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -234,6 +234,20 @@ static const u32 gen7_blt_regs[] = {
BCS_SWCTRL,
 };
 
+/* Whitelists for the DRM master. Magic numbers are taken from sna, to match. 
*/
+static const u32 ivb_master_regs[] = {
+   0xa188, /* FORCEWAKE_MT */
+   0x44050, /* DERRMR */
+   0x70068,
+   0x71068,
+   0x72068,
+};
+
+static const u32 hsw_master_regs[] = {
+   0xa188, /* FORCEWAKE_MT */
+   0x44050, /* DERRMR */
+};
+
 #define CLIENT_MASK  0xE000
 #define SUBCLIENT_MASK   0x1800
 #define MI_CLIENT0x
@@ -365,6 +379,14 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer 
*ring)
ring-reg_table = gen7_render_regs;
ring-reg_count = ARRAY_SIZE(gen7_render_regs);
 
+   if (IS_HASWELL(ring-dev)) {
+   ring-master_reg_table = hsw_master_regs;
+   ring-master_reg_count = ARRAY_SIZE(hsw_master_regs);
+   } else {
+   ring-master_reg_table = ivb_master_regs;
+   ring-master_reg_count = ARRAY_SIZE(ivb_master_regs);
+   }
+
ring-get_cmd_length_mask = gen7_render_get_cmd_length_mask;
break;
case VCS:
@@ -384,6 +406,14 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer 
*ring)
ring-reg_table = gen7_blt_regs;
ring-reg_count = ARRAY_SIZE(gen7_blt_regs);
 
+   if (IS_HASWELL(ring-dev)) {
+   ring-master_reg_table = hsw_master_regs;
+   ring-master_reg_count = ARRAY_SIZE(hsw_master_regs);
+   } else {
+   ring-master_reg_table = ivb_master_regs;
+   ring-master_reg_count = ARRAY_SIZE(ivb_master_regs);
+   }
+
ring-get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
break;
case VECS:
-- 
1.8.5.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 06/13] drm/i915: Add register whitelists for mesa

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

These registers are currently used by mesa for blitting,
transform feedback extensions, and performance monitoring
extensions.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 55 ++
 drivers/gpu/drm/i915/i915_reg.h| 20 +
 2 files changed, 75 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 88456638..18d5b05 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -185,6 +185,55 @@ static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] 
= {
{ hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) },
 };
 
+/*
+ * Register whitelists, sorted by increasing register offset.
+ *
+ * Some registers that userspace accesses are 64 bits. The register
+ * access commands only allow 32-bit accesses. Hence, we have to include
+ * entries for both halves of the 64-bit registers.
+ */
+
+static const u32 gen7_render_regs[] = {
+   HS_INVOCATION_COUNT,
+   HS_INVOCATION_COUNT + sizeof(u32),
+   DS_INVOCATION_COUNT,
+   DS_INVOCATION_COUNT + sizeof(u32),
+   IA_VERTICES_COUNT,
+   IA_VERTICES_COUNT + sizeof(u32),
+   IA_PRIMITIVES_COUNT,
+   IA_PRIMITIVES_COUNT + sizeof(u32),
+   VS_INVOCATION_COUNT,
+   VS_INVOCATION_COUNT + sizeof(u32),
+   GS_INVOCATION_COUNT,
+   GS_INVOCATION_COUNT + sizeof(u32),
+   GS_PRIMITIVES_COUNT,
+   GS_PRIMITIVES_COUNT + sizeof(u32),
+   CL_INVOCATION_COUNT,
+   CL_INVOCATION_COUNT + sizeof(u32),
+   CL_PRIMITIVES_COUNT,
+   CL_PRIMITIVES_COUNT + sizeof(u32),
+   PS_INVOCATION_COUNT,
+   PS_INVOCATION_COUNT + sizeof(u32),
+   PS_DEPTH_COUNT,
+   PS_DEPTH_COUNT + sizeof(u32),
+   GEN7_SO_NUM_PRIMS_WRITTEN(0),
+   GEN7_SO_NUM_PRIMS_WRITTEN(0) + sizeof(u32),
+   GEN7_SO_NUM_PRIMS_WRITTEN(1),
+   GEN7_SO_NUM_PRIMS_WRITTEN(1) + sizeof(u32),
+   GEN7_SO_NUM_PRIMS_WRITTEN(2),
+   GEN7_SO_NUM_PRIMS_WRITTEN(2) + sizeof(u32),
+   GEN7_SO_NUM_PRIMS_WRITTEN(3),
+   GEN7_SO_NUM_PRIMS_WRITTEN(3) + sizeof(u32),
+   GEN7_SO_WRITE_OFFSET(0),
+   GEN7_SO_WRITE_OFFSET(1),
+   GEN7_SO_WRITE_OFFSET(2),
+   GEN7_SO_WRITE_OFFSET(3),
+};
+
+static const u32 gen7_blt_regs[] = {
+   BCS_SWCTRL,
+};
+
 #define CLIENT_MASK  0xE000
 #define SUBCLIENT_MASK   0x1800
 #define MI_CLIENT0x
@@ -313,6 +362,9 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer 
*ring)
ring-cmd_table_count = ARRAY_SIZE(gen7_render_cmds);
}
 
+   ring-reg_table = gen7_render_regs;
+   ring-reg_count = ARRAY_SIZE(gen7_render_regs);
+
ring-get_cmd_length_mask = gen7_render_get_cmd_length_mask;
break;
case VCS:
@@ -329,6 +381,9 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer 
*ring)
ring-cmd_table_count = ARRAY_SIZE(gen7_blt_cmds);
}
 
+   ring-reg_table = gen7_blt_regs;
+   ring-reg_count = ARRAY_SIZE(gen7_blt_regs);
+
ring-get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
break;
case VECS:
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 2b7c26e..b99bacf 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -385,6 +385,26 @@
 #define SRC_COPY_BLT  ((0x229)|(0x4322))
 
 /*
+ * Registers used only by the command parser
+ */
+#define BCS_SWCTRL 0x22200
+
+#define HS_INVOCATION_COUNT 0x2300
+#define DS_INVOCATION_COUNT 0x2308
+#define IA_VERTICES_COUNT   0x2310
+#define IA_PRIMITIVES_COUNT 0x2318
+#define VS_INVOCATION_COUNT 0x2320
+#define GS_INVOCATION_COUNT 0x2328
+#define GS_PRIMITIVES_COUNT 0x2330
+#define CL_INVOCATION_COUNT 0x2338
+#define CL_PRIMITIVES_COUNT 0x2340
+#define PS_INVOCATION_COUNT 0x2348
+#define PS_DEPTH_COUNT  0x2350
+
+/* There are the 4 64-bit counter registers, one for each stream output */
+#define GEN7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8)
+
+/*
  * Reset registers
  */
 #define DEBUG_RESET_I830   0x6070
-- 
1.8.5.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 04/13] drm/i915: Reject privileged commands

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

The spec defines most of these commands as privileged. A few others,
like the semaphore mbox command and some display commands, are also
reserved for the driver's use. Subsequent patches relax some of
these restrictions.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 54 --
 drivers/gpu/drm/i915/i915_reg.h| 31 +--
 2 files changed, 54 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 2e27bad..cc2f68c 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -57,27 +57,27 @@
 static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD(  MI_NOOP,  SMI,F,  1,  S  ),
CMD(  MI_USER_INTERRUPT,SMI,F,  1,  S  ),
-   CMD(  MI_WAIT_FOR_EVENT,SMI,F,  1,  S  ),
+   CMD(  MI_WAIT_FOR_EVENT,SMI,F,  1,  R  ),
CMD(  MI_ARB_CHECK, SMI,F,  1,  S  ),
CMD(  MI_REPORT_HEAD,   SMI,F,  1,  S  ),
CMD(  MI_SUSPEND_FLUSH, SMI,F,  1,  S  ),
-   CMD(  MI_SEMAPHORE_MBOX,SMI,   !F,  0xFF,   S  ),
-   CMD(  MI_STORE_DWORD_INDEX, SMI,   !F,  0xFF,   S  ),
-   CMD(  MI_LOAD_REGISTER_IMM(1),  SMI,   !F,  0xFF,   S  ),
-   CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   S  ),
-   CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_SEMAPHORE_MBOX,SMI,   !F,  0xFF,   R  ),
+   CMD(  MI_STORE_DWORD_INDEX, SMI,   !F,  0xFF,   R  ),
+   CMD(  MI_LOAD_REGISTER_IMM(1),  SMI,   !F,  0xFF,   R  ),
+   CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   R  ),
+   CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   R  ),
CMD(  MI_BATCH_BUFFER_START,SMI,   !F,  0xFF,   S  ),
 };
 
 static const struct drm_i915_cmd_descriptor render_cmds[] = {
CMD(  MI_FLUSH, SMI,F,  1,  S  ),
-   CMD(  MI_ARB_ON_OFF,SMI,F,  1,  S  ),
+   CMD(  MI_ARB_ON_OFF,SMI,F,  1,  R  ),
CMD(  MI_PREDICATE, SMI,F,  1,  S  ),
CMD(  MI_TOPOLOGY_FILTER,   SMI,F,  1,  S  ),
-   CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   S  ),
-   CMD(  MI_SET_CONTEXT,   SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   R  ),
+   CMD(  MI_SET_CONTEXT,   SMI,   !F,  0xFF,   R  ),
CMD(  MI_URB_CLEAR, SMI,   !F,  0xFF,   S  ),
-   CMD(  MI_UPDATE_GTT,SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_UPDATE_GTT,SMI,   !F,  0xFF,   R  ),
CMD(  MI_CLFLUSH,   SMI,   !F,  0x3FF,  S  ),
CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   S  ),
CMD(  GFX_OP_3DSTATE_VF_STATISTICS, S3D,F,  1,  S  ),
@@ -92,7 +92,9 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] 
= {
CMD(  MI_RS_CONTROL,SMI,F,  1,  S  ),
CMD(  MI_URB_ATOMIC_ALLOC,  SMI,F,  1,  S  ),
CMD(  MI_RS_CONTEXT,SMI,F,  1,  S  ),
-   CMD(  MI_LOAD_REGISTER_REG, SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_LOAD_SCAN_LINES_INCL,  SMI,   !F,  0x3F,   R  ),
+   CMD(  MI_LOAD_SCAN_LINES_EXCL,  SMI,   !F,  0x3F,   R  ),
+   CMD(  MI_LOAD_REGISTER_REG, SMI,   !F,  0xFF,   R  ),
CMD(  MI_RS_STORE_DATA_IMM, SMI,   !F,  0xFF,   S  ),
CMD(  MI_LOAD_URB_MEM,  SMI,   !F,  0xFF,   S  ),
CMD(  MI_STORE_URB_MEM, SMI,   !F,  0xFF,   S  ),
@@ -107,8 +109,9 @@ static const struct drm_i915_cmd_descriptor 
hsw_render_cmds[] = {
 };
 
 static const struct drm_i915_cmd_descriptor video_cmds[] = {
-   CMD(  MI_ARB_ON_OFF,SMI,F,  1,  S  ),
+   CMD(  MI_ARB_ON_OFF,SMI,F,  1,  R  ),
CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_UPDATE_GTT,SMI,   !F,  0x3F,   R  ),
CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   S  ),
/*
 * MFX_WAIT doesn't fit the way we handle length for most commands.
@@ -119,18 +122,25 @@ static const struct drm_i915_cmd_descriptor video_cmds[] 
= {
 };
 
 static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
-   CMD(  MI_ARB_ON_OFF,SMI,F,  1,  S  ),
+

[Intel-gfx] [PATCH 09/13] drm/i915: Reject commands that explicitly generate interrupts

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

The driver leaves most interrupts masked during normal operation,
so there would have to be additional work to enable userspace to
safely request/receive an interrupt.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 25 +++--
 drivers/gpu/drm/i915/i915_reg.h|  1 +
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 5d3e303..7de7c6a 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -56,7 +56,7 @@
  -- */
 static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD(  MI_NOOP,  SMI,F,  1,  S  ),
-   CMD(  MI_USER_INTERRUPT,SMI,F,  1,  S  ),
+   CMD(  MI_USER_INTERRUPT,SMI,F,  1,  R  ),
CMD(  MI_WAIT_FOR_EVENT,SMI,F,  1,  M  ),
CMD(  MI_ARB_CHECK, SMI,F,  1,  S  ),
CMD(  MI_REPORT_HEAD,   SMI,F,  1,  S  ),
@@ -98,7 +98,7 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = {
CMD(  GFX_OP_PIPE_CONTROL(5),   S3D,   !F,  0xFF,   B,
  .bits = {{
.offset = 1,
-   .mask = PIPE_CONTROL_MMIO_WRITE,
+   .mask = (PIPE_CONTROL_MMIO_WRITE | PIPE_CONTROL_NOTIFY),
.expected = 0
  }},
  .bits_count = 1  ),
@@ -129,6 +129,13 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = 
{
CMD(  MI_ARB_ON_OFF,SMI,F,  1,  R  ),
CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0xFF,   S  ),
CMD(  MI_UPDATE_GTT,SMI,   !F,  0x3F,   R  ),
+   CMD(  MI_FLUSH_DW,  SMI,   !F,  0x3F,   B,
+ .bits = {{
+   .offset = 0,
+   .mask = MI_FLUSH_DW_NOTIFY,
+   .expected = 0
+ }},
+ .bits_count = 1  ),
CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   S  ),
/*
 * MFX_WAIT doesn't fit the way we handle length for most commands.
@@ -142,6 +149,13 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
CMD(  MI_ARB_ON_OFF,SMI,F,  1,  R  ),
CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0xFF,   S  ),
CMD(  MI_UPDATE_GTT,SMI,   !F,  0x3F,   R  ),
+   CMD(  MI_FLUSH_DW,  SMI,   !F,  0x3F,   B,
+ .bits = {{
+   .offset = 0,
+   .mask = MI_FLUSH_DW_NOTIFY,
+   .expected = 0
+ }},
+ .bits_count = 1  ),
CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   S  ),
 };
 
@@ -149,6 +163,13 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = {
CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   R  ),
CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0x3FF,  S  ),
CMD(  MI_UPDATE_GTT,SMI,   !F,  0x3F,   R  ),
+   CMD(  MI_FLUSH_DW,  SMI,   !F,  0x3F,   B,
+ .bits = {{
+   .offset = 0,
+   .mask = MI_FLUSH_DW_NOTIFY,
+   .expected = 0
+ }},
+ .bits_count = 1  ),
CMD(  COLOR_BLT,S2D,   !F,  0x3F,   S  ),
CMD(  SRC_COPY_BLT, S2D,   !F,  0x3F,   S  ),
 };
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 6592d0d..c2e4898 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -258,6 +258,7 @@
 #define   MI_FLUSH_DW_STORE_INDEX  (121)
 #define   MI_INVALIDATE_TLB(118)
 #define   MI_FLUSH_DW_OP_STOREDW   (114)
+#define   MI_FLUSH_DW_NOTIFY   (18)
 #define   MI_INVALIDATE_BSD(17)
 #define   MI_FLUSH_DW_USE_GTT  (12)
 #define   MI_FLUSH_DW_USE_PPGTT(02)
-- 
1.8.5.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 13/13] drm/i915: Enable command parsing by default

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

OTC-Tracker: AXIA-4631
Change-Id: I6747457e1fe7494bd42787af51198fcba398ad78
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_params.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_params.c 
b/drivers/gpu/drm/i915/i915_params.c
index 6d3d906..981b635 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -47,7 +47,7 @@ struct i915_params i915 __read_mostly = {
.prefault_disable = 0,
.reset = true,
.invert_brightness = 0,
-   .enable_cmd_parser = 0
+   .enable_cmd_parser = 1
 };
 
 module_param_named(modeset, i915.modeset, int, 0400);
@@ -157,4 +157,4 @@ MODULE_PARM_DESC(invert_brightness,
 
 module_param_named(enable_cmd_parser, i915.enable_cmd_parser, int, 0600);
 MODULE_PARM_DESC(enable_cmd_parser,
-   Enable command parsing (default: false));
+   Enable command parsing (default: true));
-- 
1.8.5.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 12/13] drm/i915: Add a CMD_PARSER_VERSION getparam

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

So userspace can query the kernel for command parser support.

OTC-Tracker: AXIA-4631
Change-Id: I58af650db9f6753c2dcac9c54ab432fd31db302f
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_dma.c | 4 
 include/uapi/drm/i915_drm.h | 1 +
 2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 258b1be..34ba199 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1013,6 +1013,10 @@ static int i915_getparam(struct drm_device *dev, void 
*data,
case I915_PARAM_HAS_EXEC_HANDLE_LUT:
value = 1;
break;
+   case I915_PARAM_CMD_PARSER_VERSION:
+   /* TODO: version info (e.g. what is allowed?) */
+   value = 1;
+   break;
default:
DRM_DEBUG(Unknown parameter %d\n, param-param);
return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 126bfaa..8a3e4ef00 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -337,6 +337,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_NO_RELOC25
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
 #define I915_PARAM_HAS_WT   27
+#define I915_PARAM_CMD_PARSER_VERSION   28
 
 typedef struct drm_i915_getparam {
int param;
-- 
1.8.5.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 01/13] drm/i915: Refactor shmem pread setup

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

The command parser is going to need the same synchronization and
setup logic, so factor it out for reuse.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h |  3 +++
 drivers/gpu/drm/i915/i915_gem.c | 48 +
 2 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3673ba1..bfb30df 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2045,6 +2045,9 @@ void i915_gem_release_all_mmaps(struct drm_i915_private 
*dev_priv);
 void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
 void i915_gem_lastclose(struct drm_device *dev);
 
+int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
+   int *needs_clflush);
+
 int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object 
*obj, int n)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 39770f7..fdc1f40 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -332,6 +332,39 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
return 0;
 }
 
+/*
+ * Pins the specified object's pages and synchronizes the object with
+ * GPU accesses. Sets needs_clflush to non-zero if the caller should
+ * flush the object from the CPU cache.
+ */
+int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
+   int *needs_clflush)
+{
+   int ret;
+
+   *needs_clflush = 0;
+
+   if (!(obj-base.read_domains  I915_GEM_DOMAIN_CPU)) {
+   /* If we're not in the cpu read domain, set ourself into the gtt
+* read domain and manually flush cachelines (if required). This
+* optimizes for the case when the gpu will dirty the data
+* anyway again before the next pread happens. */
+   *needs_clflush = !cpu_cache_is_coherent(obj-base.dev,
+   obj-cache_level);
+   ret = i915_gem_object_wait_rendering(obj, true);
+   if (ret)
+   return ret;
+   }
+
+   ret = i915_gem_object_get_pages(obj);
+   if (ret)
+   return ret;
+
+   i915_gem_object_pin_pages(obj);
+
+   return ret;
+}
+
 /* Per-page copy function for the shmem pread fastpath.
  * Flushes invalid cachelines before reading the target if
  * needs_clflush is set. */
@@ -429,23 +462,10 @@ i915_gem_shmem_pread(struct drm_device *dev,
 
obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 
-   if (!(obj-base.read_domains  I915_GEM_DOMAIN_CPU)) {
-   /* If we're not in the cpu read domain, set ourself into the gtt
-* read domain and manually flush cachelines (if required). This
-* optimizes for the case when the gpu will dirty the data
-* anyway again before the next pread happens. */
-   needs_clflush = !cpu_cache_is_coherent(dev, obj-cache_level);
-   ret = i915_gem_object_wait_rendering(obj, true);
-   if (ret)
-   return ret;
-   }
-
-   ret = i915_gem_object_get_pages(obj);
+   ret = i915_gem_obj_prepare_shmem_read(obj, needs_clflush);
if (ret)
return ret;
 
-   i915_gem_object_pin_pages(obj);
-
offset = args-offset;
 
for_each_sg_page(obj-pages-sgl, sg_iter, obj-pages-nents,
-- 
1.8.5.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 03/13] drm/i915: Initial command parser table definitions

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Add command tables defining irregular length commands for each ring.
This requires a few new command opcode definitions.

OTC-Tracker: AXIA-4631
Change-Id: I064bceb457e15f46928058352afe76d918c58ef5
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 157 +
 drivers/gpu/drm/i915/i915_reg.h|  46 ++
 2 files changed, 203 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 7639dbc..2e27bad 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -27,6 +27,148 @@
 
 #include i915_drv.h
 
+#define STD_MI_OPCODE_MASK  0xFF80
+#define STD_3D_OPCODE_MASK  0x
+#define STD_2D_OPCODE_MASK  0xFFC0
+#define STD_MFX_OPCODE_MASK 0x
+
+#define CMD(op, opm, f, lm, fl, ...)   \
+   {   \
+   .flags = (fl) | (f),\
+   .cmd = { (op), (opm) }, \
+   .length = { (lm) }, \
+   __VA_ARGS__ \
+   }
+
+/* Convenience macros to compress the tables */
+#define SMI STD_MI_OPCODE_MASK
+#define S3D STD_3D_OPCODE_MASK
+#define S2D STD_2D_OPCODE_MASK
+#define SMFX STD_MFX_OPCODE_MASK
+#define F CMD_DESC_FIXED
+#define S CMD_DESC_SKIP
+#define R CMD_DESC_REJECT
+#define W CMD_DESC_REGISTER
+#define B CMD_DESC_BITMASK
+#define M CMD_DESC_MASTER
+
+/*Command  Mask   Fixed Len   Action
+ -- */
+static const struct drm_i915_cmd_descriptor common_cmds[] = {
+   CMD(  MI_NOOP,  SMI,F,  1,  S  ),
+   CMD(  MI_USER_INTERRUPT,SMI,F,  1,  S  ),
+   CMD(  MI_WAIT_FOR_EVENT,SMI,F,  1,  S  ),
+   CMD(  MI_ARB_CHECK, SMI,F,  1,  S  ),
+   CMD(  MI_REPORT_HEAD,   SMI,F,  1,  S  ),
+   CMD(  MI_SUSPEND_FLUSH, SMI,F,  1,  S  ),
+   CMD(  MI_SEMAPHORE_MBOX,SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_STORE_DWORD_INDEX, SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_LOAD_REGISTER_IMM(1),  SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_BATCH_BUFFER_START,SMI,   !F,  0xFF,   S  ),
+};
+
+static const struct drm_i915_cmd_descriptor render_cmds[] = {
+   CMD(  MI_FLUSH, SMI,F,  1,  S  ),
+   CMD(  MI_ARB_ON_OFF,SMI,F,  1,  S  ),
+   CMD(  MI_PREDICATE, SMI,F,  1,  S  ),
+   CMD(  MI_TOPOLOGY_FILTER,   SMI,F,  1,  S  ),
+   CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_SET_CONTEXT,   SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_URB_CLEAR, SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_UPDATE_GTT,SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_CLFLUSH,   SMI,   !F,  0x3FF,  S  ),
+   CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   S  ),
+   CMD(  GFX_OP_3DSTATE_VF_STATISTICS, S3D,F,  1,  S  ),
+   CMD(  PIPELINE_SELECT,  S3D,F,  1,  S  ),
+   CMD(  GPGPU_OBJECT, S3D,   !F,  0xFF,   S  ),
+   CMD(  GPGPU_WALKER, S3D,   !F,  0xFF,   S  ),
+   CMD(  GFX_OP_3DSTATE_SO_DECL_LIST,  S3D,   !F,  0x1FF,  S  ),
+};
+
+static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
+   CMD(  MI_SET_PREDICATE, SMI,F,  1,  S  ),
+   CMD(  MI_RS_CONTROL,SMI,F,  1,  S  ),
+   CMD(  MI_URB_ATOMIC_ALLOC,  SMI,F,  1,  S  ),
+   CMD(  MI_RS_CONTEXT,SMI,F,  1,  S  ),
+   CMD(  MI_LOAD_REGISTER_REG, SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_RS_STORE_DATA_IMM, SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_LOAD_URB_MEM,  SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_STORE_URB_MEM, SMI,   !F,  0xFF,   S  ),
+   CMD(  GFX_OP_3DSTATE_DX9_CONSTANTF_VS,  S3D,   !F,  0x7FF,  S  ),
+   CMD(  GFX_OP_3DSTATE_DX9_CONSTANTF_PS,  S3D,   !F,  0x7FF,  S  ),
+
+   CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS,  S3D,   !F,  0x1FF,  S  ),
+   CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS,  S3D,   !F,  0x1FF,  S  ),
+   CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS,  S3D,   !F,  0x1FF,  S  ),
+   CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS,  S3D,   !F,  0x1FF,  S  ),
+   CMD(  

[Intel-gfx] [PATCH 02/13] drm/i915: Implement command buffer parsing logic

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

The command parser scans batch buffers submitted via execbuffer ioctls before
the driver submits them to hardware. At a high level, it looks for several
things:

1) Commands which are explicitly defined as privileged or which should only be
   used by the kernel driver. The parser generally rejects such commands, with
   the provision that it may allow some from the drm master process.
2) Commands which access registers. To support correct/enhanced userspace
   functionality, particularly certain OpenGL extensions, the parser provides a
   whitelist of registers which userspace may safely access (for both normal and
   drm master processes).
3) Commands which access privileged memory (i.e. GGTT, HWS page, etc). The
   parser always rejects such commands.

Each ring maintains tables of commands and registers which the parser uses in
scanning batch buffers submitted to that ring.

The set of commands that the parser must check for is significantly smaller
than the number of commands supported, especially on the render ring. As such,
the parser tables (built up in subsequent patches) contain only those commands
required by the parser. This generally works because command opcode ranges have
standard command length encodings. So for commands that the parser does not need
to check, it can easily skip them. This is implementated via a per-ring length
decoding vfunc.

Unfortunately, there are a number of commands that do not follow the standard
length encoding for their opcode range, primarily amongst the MI_* commands. To
handle this, the parser provides a way to define explicit skip entries in the
per-ring command tables.

Other command table entries will map fairly directly to high level categories
mentioned above: rejected, master-only, register whitelist. A number of checks,
including the privileged memory checks, are implemented via a general bitmasking
mechanism.

OTC-Tracker: AXIA-4631
Change-Id: I50b98c71c6655893291c78a2d1b8954577b37a30
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/Makefile  |   3 +-
 drivers/gpu/drm/i915/i915_cmd_parser.c | 404 +
 drivers/gpu/drm/i915/i915_drv.h|  94 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  17 ++
 drivers/gpu/drm/i915/i915_params.c |   5 +
 drivers/gpu/drm/i915/intel_ringbuffer.c|   2 +
 drivers/gpu/drm/i915/intel_ringbuffer.h|  32 +++
 7 files changed, 556 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/i915_cmd_parser.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 4850494..2da81bf 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -47,7 +47,8 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \
  dvo_tfp410.o \
  dvo_sil164.o \
  dvo_ns2501.o \
- i915_gem_dmabuf.o
+ i915_gem_dmabuf.o \
+ i915_cmd_parser.o
 
 i915-$(CONFIG_COMPAT)   += i915_ioc32.o
 
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
new file mode 100644
index 000..7639dbc
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *Brad Volkin bradley.d.vol...@intel.com
+ *
+ */
+
+#include i915_drv.h
+
+#define CLIENT_MASK  0xE000
+#define SUBCLIENT_MASK   0x1800
+#define MI_CLIENT0x
+#define RC_CLIENT0x6000
+#define BC_CLIENT0x4000
+#define MEDIA_SUBCLIENT  0x1000
+
+static u32 gen7_render_get_cmd_length_mask(u32 cmd_header)
+{
+   u32 client = cmd_header  CLIENT_MASK;
+   u32 subclient = cmd_header  SUBCLIENT_MASK;
+
+   if (client == MI_CLIENT)
+   return 0x3F;
+   else if (client == 

[Intel-gfx] [PATCH 05/13] drm/i915: Allow some privileged commands from master

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

The Intel DDX uses these to implement scanline waits in the X server.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index cc2f68c..88456638 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -57,7 +57,7 @@
 static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD(  MI_NOOP,  SMI,F,  1,  S  ),
CMD(  MI_USER_INTERRUPT,SMI,F,  1,  S  ),
-   CMD(  MI_WAIT_FOR_EVENT,SMI,F,  1,  R  ),
+   CMD(  MI_WAIT_FOR_EVENT,SMI,F,  1,  M  ),
CMD(  MI_ARB_CHECK, SMI,F,  1,  S  ),
CMD(  MI_REPORT_HEAD,   SMI,F,  1,  S  ),
CMD(  MI_SUSPEND_FLUSH, SMI,F,  1,  S  ),
@@ -92,7 +92,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] 
= {
CMD(  MI_RS_CONTROL,SMI,F,  1,  S  ),
CMD(  MI_URB_ATOMIC_ALLOC,  SMI,F,  1,  S  ),
CMD(  MI_RS_CONTEXT,SMI,F,  1,  S  ),
-   CMD(  MI_LOAD_SCAN_LINES_INCL,  SMI,   !F,  0x3F,   R  ),
+   CMD(  MI_LOAD_SCAN_LINES_INCL,  SMI,   !F,  0x3F,   M  ),
CMD(  MI_LOAD_SCAN_LINES_EXCL,  SMI,   !F,  0x3F,   R  ),
CMD(  MI_LOAD_REGISTER_REG, SMI,   !F,  0xFF,   R  ),
CMD(  MI_RS_STORE_DATA_IMM, SMI,   !F,  0xFF,   S  ),
@@ -137,7 +137,7 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = {
 };
 
 static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
-   CMD(  MI_LOAD_SCAN_LINES_INCL,  SMI,   !F,  0x3F,   R  ),
+   CMD(  MI_LOAD_SCAN_LINES_INCL,  SMI,   !F,  0x3F,   M  ),
CMD(  MI_LOAD_SCAN_LINES_EXCL,  SMI,   !F,  0x3F,   R  ),
 };
 
-- 
1.8.5.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 10/13] drm/i915: Enable PPGTT command parser checks

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Various commands that access memory have a bit to determine whether
the graphics address specified in the command should use the GGTT or
PPGTT for translation. These checks ensure that the bit indicates
PPGTT translation.

Most of these checks use the existing bit-checking infrastructure.
The PIPE_CONTROL and MI_FLUSH_DW commands, however, are multi-function
commands. The GGTT/PPGTT bit is only relevant for certain uses of the
command. As such, this change also extends the bit-checking code to
include a condition mask and offset. If the condition mask is non-zero
then the parser only performs the bit check when the bits specified by
the condition mask/offset are also non-zero.

NOTE: At this point in the series PPGTT must be enabled for the parser
to work correctly. If it's not enabled, userspace will not be setting
the PPGTT bits the way the parser requires. VLV is the only platform
where this is a problem, so at this point, we disable parsing for VLV.

OTC-Tracker: AXIA-4631
Change-Id: I3f4c76b6734f1956ec47e698230f97d0998ff92b
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 147 +
 drivers/gpu/drm/i915/i915_drv.h|   6 ++
 drivers/gpu/drm/i915/i915_reg.h|   6 ++
 3 files changed, 144 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 7de7c6a..26072a2 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -65,10 +65,22 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = 
{
CMD(  MI_STORE_DWORD_INDEX, SMI,   !F,  0xFF,   R  ),
CMD(  MI_LOAD_REGISTER_IMM(1),  SMI,   !F,  0xFF,   W,
  .reg = { .offset = 1, .mask = 0x007C }   ),
-   CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   W,
- .reg = { .offset = 1, .mask = 0x007C }   ),
-   CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   W,
- .reg = { .offset = 1, .mask = 0x007C }   ),
+   CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   W | B,
+ .reg = { .offset = 1, .mask = 0x007C },
+ .bits = {{
+   .offset = 0,
+   .mask = MI_GLOBAL_GTT,
+   .expected = 0
+ }},
+ .bits_count = 1  ),
+   CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   W | B,
+ .reg = { .offset = 1, .mask = 0x007C },
+ .bits = {{
+   .offset = 0,
+   .mask = MI_GLOBAL_GTT,
+   .expected = 0
+ }},
+ .bits_count = 1  ),
CMD(  MI_BATCH_BUFFER_START,SMI,   !F,  0xFF,   S  ),
 };
 
@@ -80,9 +92,35 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = {
CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   R  ),
CMD(  MI_SET_CONTEXT,   SMI,   !F,  0xFF,   R  ),
CMD(  MI_URB_CLEAR, SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0x3F,   B,
+ .bits = {{
+   .offset = 0,
+   .mask = MI_GLOBAL_GTT,
+   .expected = 0
+ }},
+ .bits_count = 1  ),
CMD(  MI_UPDATE_GTT,SMI,   !F,  0xFF,   R  ),
-   CMD(  MI_CLFLUSH,   SMI,   !F,  0x3FF,  S  ),
-   CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_CLFLUSH,   SMI,   !F,  0x3FF,  B,
+ .bits = {{
+   .offset = 0,
+   .mask = MI_GLOBAL_GTT,
+   .expected = 0
+ }},
+ .bits_count = 1  ),
+   CMD(  MI_REPORT_PERF_COUNT, SMI,   !F,  0x3F,   B,
+ .bits = {{
+   .offset = 1,
+   .mask = MI_REPORT_PERF_COUNT_GGTT,
+   .expected = 0
+ }},
+ .bits_count = 1  ),
+   CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   B,
+ .bits = {{
+   .offset = 0,
+   .mask = MI_GLOBAL_GTT,
+   .expected = 0
+ }},
+ .bits_count = 1  ),
CMD(  GFX_OP_3DSTATE_VF_STATISTICS, S3D,F,  1,  S  ),
CMD(  PIPELINE_SELECT,  S3D,F,  1,  S  ),
CMD(  MEDIA_VFE_STATE,  S3D,   !F,  

[Intel-gfx] [PATCH 11/13] drm/i915: Reject commands that would store to global HWS page

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

PIPE_CONTROL and MI_FLUSH_DW have bits that would write to the
hardware status page. The driver stores request tracking info
there, so don't let userspace overwrite it.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 30 ++
 drivers/gpu/drm/i915/i915_reg.h|  1 +
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 26072a2..b93df1c 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -141,7 +141,8 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = 
{
  },
  {
.offset = 1,
-   .mask = PIPE_CONTROL_GLOBAL_GTT_IVB,
+   .mask = (PIPE_CONTROL_GLOBAL_GTT_IVB |
+PIPE_CONTROL_STORE_DATA_INDEX),
.expected = 0,
.condition_offset = 1,
.condition_mask = PIPE_CONTROL_POST_SYNC_OP_MASK
@@ -192,8 +193,15 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = 
{
.expected = 0,
.condition_offset = 0,
.condition_mask = MI_FLUSH_DW_OP_MASK
+ },
+ {
+   .offset = 0,
+   .mask = MI_FLUSH_DW_STORE_INDEX,
+   .expected = 0,
+   .condition_offset = 0,
+   .condition_mask = MI_FLUSH_DW_OP_MASK
  }},
- .bits_count = 2  ),
+ .bits_count = 3  ),
CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   B,
  .bits = {{
.offset = 0,
@@ -231,8 +239,15 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
.expected = 0,
.condition_offset = 0,
.condition_mask = MI_FLUSH_DW_OP_MASK
+ },
+ {
+   .offset = 0,
+   .mask = MI_FLUSH_DW_STORE_INDEX,
+   .expected = 0,
+   .condition_offset = 0,
+   .condition_mask = MI_FLUSH_DW_OP_MASK
  }},
- .bits_count = 2  ),
+ .bits_count = 3  ),
CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   B,
  .bits = {{
.offset = 0,
@@ -264,8 +279,15 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = {
.expected = 0,
.condition_offset = 0,
.condition_mask = MI_FLUSH_DW_OP_MASK
+ },
+ {
+   .offset = 0,
+   .mask = MI_FLUSH_DW_STORE_INDEX,
+   .expected = 0,
+   .condition_offset = 0,
+   .condition_mask = MI_FLUSH_DW_OP_MASK
  }},
- .bits_count = 2  ),
+ .bits_count = 3  ),
CMD(  COLOR_BLT,S2D,   !F,  0x3F,   S  ),
CMD(  SRC_COPY_BLT, S2D,   !F,  0x3F,   S  ),
 };
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ff263f4..5f77cb6 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -324,6 +324,7 @@
 #define GFX_OP_PIPE_CONTROL(len)   ((0x329)|(0x327)|(0x224)|(len-2))
 #define   PIPE_CONTROL_GLOBAL_GTT_IVB  (124) /* gen7+ */
 #define   PIPE_CONTROL_MMIO_WRITE  (123)
+#define   PIPE_CONTROL_STORE_DATA_INDEX(121)
 #define   PIPE_CONTROL_CS_STALL(120)
 #define   PIPE_CONTROL_TLB_INVALIDATE  (118)
 #define   PIPE_CONTROL_QW_WRITE(114)
-- 
1.8.5.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/6] tests: Add a test for the command parser

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Start with a simple testcase that should pass.

v2: Switch to I915_PARAM_CMD_PARSER_VERSION

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 tests/.gitignore   |   1 +
 tests/Makefile.sources |   1 +
 tests/gem_exec_parse.c | 140 +
 3 files changed, 142 insertions(+)
 create mode 100644 tests/gem_exec_parse.c

diff --git a/tests/.gitignore b/tests/.gitignore
index 7377275..f2356fb 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -35,6 +35,7 @@ gem_exec_blt
 gem_exec_faulting_reloc
 gem_exec_lut_handle
 gem_exec_nop
+gem_exec_parse
 gem_fd_exhaustion
 gem_fenced_exec_thrash
 gem_fence_thrash
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index a8c0c96..90a5322 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -29,6 +29,7 @@ TESTS_progs_M = \
gem_exec_bad_domains \
gem_exec_faulting_reloc \
gem_exec_nop \
+   gem_exec_parse \
gem_fenced_exec_thrash \
gem_fence_thrash \
gem_flink \
diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c
new file mode 100644
index 000..c71e478
--- /dev/null
+++ b/tests/gem_exec_parse.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include stdlib.h
+#include stdint.h
+#include stdio.h
+#include drm.h
+#include i915_drm.h
+#include drmtest.h
+
+#ifndef I915_PARAM_CMD_PARSER_VERSION
+#define I915_PARAM_CMD_PARSER_VERSION   28
+#endif
+
+static int exec_batch_patched(int fd, uint32_t cmd_bo, uint32_t *cmds,
+ int size, int patch_offset, uint64_t 
expected_value)
+{
+   struct drm_i915_gem_execbuffer2 execbuf;
+   struct drm_i915_gem_exec_object2 objs[2];
+   struct drm_i915_gem_relocation_entry reloc[1];
+
+   uint32_t target_bo = gem_create(fd, 4096);
+   uint64_t actual_value = 0;
+
+   gem_write(fd, cmd_bo, 0, cmds, size);
+
+   reloc[0].offset = patch_offset;
+   reloc[0].delta = 0;
+   reloc[0].target_handle = target_bo;
+   reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+   reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+   reloc[0].presumed_offset = 0;
+
+   objs[0].handle = target_bo;
+   objs[0].relocation_count = 0;
+   objs[0].relocs_ptr = 0;
+   objs[0].alignment = 0;
+   objs[0].offset = 0;
+   objs[0].flags = 0;
+   objs[0].rsvd1 = 0;
+   objs[0].rsvd2 = 0;
+
+   objs[1].handle = cmd_bo;
+   objs[1].relocation_count = 1;
+   objs[1].relocs_ptr = (uintptr_t)reloc;
+   objs[1].alignment = 0;
+   objs[1].offset = 0;
+   objs[1].flags = 0;
+   objs[1].rsvd1 = 0;
+   objs[1].rsvd2 = 0;
+
+   execbuf.buffers_ptr = (uintptr_t)objs;
+   execbuf.buffer_count = 2;
+   execbuf.batch_start_offset = 0;
+   execbuf.batch_len = size;
+   execbuf.cliprects_ptr = 0;
+   execbuf.num_cliprects = 0;
+   execbuf.DR1 = 0;
+   execbuf.DR4 = 0;
+   execbuf.flags = I915_EXEC_RENDER;
+   i915_execbuffer2_set_context_id(execbuf, 0);
+   execbuf.rsvd2 = 0;
+
+   gem_execbuf(fd, execbuf);
+   gem_sync(fd, cmd_bo);
+
+   gem_read(fd,target_bo, 0, actual_value, sizeof(actual_value));
+   igt_assert(expected_value == actual_value);
+
+   gem_close(fd, target_bo);
+
+   return 1;
+}
+
+uint32_t handle;
+int fd;
+
+#define GFX_OP_PIPE_CONTROL((0x329)|(0x327)|(0x224)|2)
+#define   PIPE_CONTROL_QW_WRITE(114)
+
+igt_main
+{
+   igt_fixture {
+   int parser_version = 0;
+drm_i915_getparam_t gp;
+   int rc;
+
+   fd = drm_open_any();
+
+   gp.param = I915_PARAM_CMD_PARSER_VERSION;
+   gp.value = parser_version;
+   rc = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, 

[Intel-gfx] [PATCH 5/6] tests/gem_exec_parse: Test for batches w/o MI_BATCH_BUFFER_END

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 tests/gem_exec_parse.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c
index 9e90408..004c3bf 100644
--- a/tests/gem_exec_parse.c
+++ b/tests/gem_exec_parse.c
@@ -257,6 +257,15 @@ igt_main
  -EINVAL));
}
 
+   igt_subtest(batch-without-end) {
+   uint32_t noop[1024] = { 0 };
+   igt_assert(
+  exec_batch(fd, handle,
+ noop, sizeof(noop),
+ I915_EXEC_RENDER,
+ -EINVAL));
+   }
+
igt_fixture {
gem_close(fd, handle);
 
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 4/6] tests/gem_exec_parse: Add tests for bitmask checks

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 tests/gem_exec_parse.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c
index 48fde25..9e90408 100644
--- a/tests/gem_exec_parse.c
+++ b/tests/gem_exec_parse.c
@@ -145,6 +145,7 @@ int fd;
 
 #define GFX_OP_PIPE_CONTROL((0x329)|(0x327)|(0x224)|2)
 #define   PIPE_CONTROL_QW_WRITE(114)
+#define   PIPE_CONTROL_LRI_POST_OP (123)
 
 igt_main
 {
@@ -239,6 +240,23 @@ igt_main
  0));
}
 
+   igt_subtest(bitmasks) {
+   uint32_t pc[] = {
+   GFX_OP_PIPE_CONTROL,
+   (PIPE_CONTROL_QW_WRITE |
+PIPE_CONTROL_LRI_POST_OP),
+   0, // To be patched
+   0x1200,
+   0,
+   MI_BATCH_BUFFER_END,
+   };
+   igt_assert(
+  exec_batch(fd, handle,
+ pc, sizeof(pc),
+ I915_EXEC_RENDER,
+ -EINVAL));
+   }
+
igt_fixture {
gem_close(fd, handle);
 
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 3/6] tests/gem_exec_parse: Add tests for register whitelist

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 tests/gem_exec_parse.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c
index ebf7116..48fde25 100644
--- a/tests/gem_exec_parse.c
+++ b/tests/gem_exec_parse.c
@@ -141,6 +141,7 @@ int fd;
 
 #define MI_ARB_ON_OFF (0x8  23)
 #define MI_DISPLAY_FLIP ((0x14  23) | 1)
+#define MI_LOAD_REGISTER_IMM ((0x22  23) | 1)
 
 #define GFX_OP_PIPE_CONTROL((0x329)|(0x327)|(0x224)|2)
 #define   PIPE_CONTROL_QW_WRITE(114)
@@ -213,6 +214,31 @@ igt_main
  -EINVAL));
}
 
+   igt_subtest(registers) {
+   uint32_t lri_bad[] = {
+   MI_LOAD_REGISTER_IMM,
+   0, // disallowed register address
+   0x1200,
+   MI_BATCH_BUFFER_END,
+   };
+   uint32_t lri_ok[] = {
+   MI_LOAD_REGISTER_IMM,
+   0x5280, // allowed register address (SO_WRITE_OFFSET[0])
+   0x1,
+   MI_BATCH_BUFFER_END,
+   };
+   igt_assert(
+  exec_batch(fd, handle,
+ lri_bad, sizeof(lri_bad),
+ I915_EXEC_RENDER,
+ -EINVAL));
+   igt_assert(
+  exec_batch(fd, handle,
+ lri_ok, sizeof(lri_ok),
+ I915_EXEC_RENDER,
+ 0));
+   }
+
igt_fixture {
gem_close(fd, handle);
 
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] intel: Merge i915_drm.h with cmd parser define

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 include/drm/i915_drm.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 2f4eb8c..ba863c4 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -27,7 +27,7 @@
 #ifndef _I915_DRM_H_
 #define _I915_DRM_H_
 
-#include drm.h
+#include drm/drm.h
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
@@ -337,6 +337,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_NO_RELOC25
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
 #define I915_PARAM_HAS_WT   27
+#define I915_PARAM_CMD_PARSER_VERSION   28
 
 typedef struct drm_i915_getparam {
int param;
@@ -721,7 +722,7 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_IS_PINNED(110)
 
-/** Provide a hint to the kernel that the command stream and auxilliary
+/** Provide a hint to the kernel that the command stream and auxiliary
  * state buffers already holds the correct presumed addresses and so the
  * relocation process may be skipped if no buffers need to be moved in
  * preparation for the execbuffer.
-- 
1.8.5.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/6] tests/gem_exec_parse: Add tests for rejected commands

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 tests/gem_exec_parse.c | 81 ++
 1 file changed, 81 insertions(+)

diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c
index c71e478..ebf7116 100644
--- a/tests/gem_exec_parse.c
+++ b/tests/gem_exec_parse.c
@@ -93,9 +93,55 @@ static int exec_batch_patched(int fd, uint32_t cmd_bo, 
uint32_t *cmds,
return 1;
 }
 
+static int exec_batch(int fd, uint32_t cmd_bo, uint32_t *cmds,
+ int size, int ring, int expected_ret)
+{
+   struct drm_i915_gem_execbuffer2 execbuf;
+   struct drm_i915_gem_exec_object2 objs[1];
+   int ret;
+
+   gem_write(fd, cmd_bo, 0, cmds, size);
+
+   objs[0].handle = cmd_bo;
+   objs[0].relocation_count = 0;
+   objs[0].relocs_ptr = 0;
+   objs[0].alignment = 0;
+   objs[0].offset = 0;
+   objs[0].flags = 0;
+   objs[0].rsvd1 = 0;
+   objs[0].rsvd2 = 0;
+
+   execbuf.buffers_ptr = (uintptr_t)objs;
+   execbuf.buffer_count = 1;
+   execbuf.batch_start_offset = 0;
+   execbuf.batch_len = size;
+   execbuf.cliprects_ptr = 0;
+   execbuf.num_cliprects = 0;
+   execbuf.DR1 = 0;
+   execbuf.DR4 = 0;
+   execbuf.flags = ring;
+   i915_execbuffer2_set_context_id(execbuf, 0);
+   execbuf.rsvd2 = 0;
+
+   ret = drmIoctl(fd,
+  DRM_IOCTL_I915_GEM_EXECBUFFER2,
+  execbuf);
+   if (ret == 0)
+   igt_assert(expected_ret == 0);
+   else
+   igt_assert(-errno == expected_ret);
+
+   gem_sync(fd, cmd_bo);
+
+   return 1;
+}
+
 uint32_t handle;
 int fd;
 
+#define MI_ARB_ON_OFF (0x8  23)
+#define MI_DISPLAY_FLIP ((0x14  23) | 1)
+
 #define GFX_OP_PIPE_CONTROL((0x329)|(0x327)|(0x224)|2)
 #define   PIPE_CONTROL_QW_WRITE(114)
 
@@ -132,6 +178,41 @@ igt_main
   0x1200));
}
 
+   igt_subtest(basic-rejected) {
+   uint32_t arb_on_off[] = {
+   MI_ARB_ON_OFF,
+   MI_BATCH_BUFFER_END,
+   };
+   uint32_t display_flip[] = {
+   MI_DISPLAY_FLIP,
+   0, 0, 0,
+   MI_BATCH_BUFFER_END,
+   0
+   };
+   igt_assert(
+  exec_batch(fd, handle,
+ arb_on_off, sizeof(arb_on_off),
+ I915_EXEC_RENDER,
+ -EINVAL));
+   igt_assert(
+  exec_batch(fd, handle,
+ arb_on_off, sizeof(arb_on_off),
+ I915_EXEC_BSD,
+ -EINVAL));
+   if (gem_has_vebox(fd)) {
+   igt_assert(
+  exec_batch(fd, handle,
+ arb_on_off, sizeof(arb_on_off),
+ I915_EXEC_VEBOX,
+ -EINVAL));
+   }
+   igt_assert(
+  exec_batch(fd, handle,
+ display_flip, sizeof(display_flip),
+ I915_EXEC_BLT,
+ -EINVAL));
+   }
+
igt_fixture {
gem_close(fd, handle);
 
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 08/13] drm/i915: Enable register whitelist checks

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

MI_STORE_REGISTER_MEM, MI_LOAD_REGISTER_MEM, and MI_LOAD_REGISTER_IMM
commands allow userspace access to registers. Only certain registers
should be allowed for such access, so enable checking for those commands.
Each ring gets its own register whitelist.

MI_LOAD_REGISTER_REG on HSW also allows register access but is currently
unused by userspace components. Leave it rejected.

PIPE_CONTROL and MEDIA_VFE_STATE allow register access based on certain
bits being set. Reject those as well.

OTC-Tracker: AXIA-4631
Change-Id: Ie614a2f0eb2e5917de809e5a17957175d24cc44f
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 23 ---
 drivers/gpu/drm/i915/i915_reg.h|  3 +++
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 296e322..5d3e303 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -63,9 +63,12 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD(  MI_SUSPEND_FLUSH, SMI,F,  1,  S  ),
CMD(  MI_SEMAPHORE_MBOX,SMI,   !F,  0xFF,   R  ),
CMD(  MI_STORE_DWORD_INDEX, SMI,   !F,  0xFF,   R  ),
-   CMD(  MI_LOAD_REGISTER_IMM(1),  SMI,   !F,  0xFF,   R  ),
-   CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   R  ),
-   CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   R  ),
+   CMD(  MI_LOAD_REGISTER_IMM(1),  SMI,   !F,  0xFF,   W,
+ .reg = { .offset = 1, .mask = 0x007C }   ),
+   CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   W,
+ .reg = { .offset = 1, .mask = 0x007C }   ),
+   CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   W,
+ .reg = { .offset = 1, .mask = 0x007C }   ),
CMD(  MI_BATCH_BUFFER_START,SMI,   !F,  0xFF,   S  ),
 };
 
@@ -82,9 +85,23 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = {
CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   S  ),
CMD(  GFX_OP_3DSTATE_VF_STATISTICS, S3D,F,  1,  S  ),
CMD(  PIPELINE_SELECT,  S3D,F,  1,  S  ),
+   CMD(  MEDIA_VFE_STATE,  S3D,   !F,  0x, B,
+ .bits = {{
+   .offset = 2,
+   .mask = MEDIA_VFE_STATE_MMIO_ACCESS_MASK,
+   .expected = 0
+ }},
+ .bits_count = 1  ),
CMD(  GPGPU_OBJECT, S3D,   !F,  0xFF,   S  ),
CMD(  GPGPU_WALKER, S3D,   !F,  0xFF,   S  ),
CMD(  GFX_OP_3DSTATE_SO_DECL_LIST,  S3D,   !F,  0x1FF,  S  ),
+   CMD(  GFX_OP_PIPE_CONTROL(5),   S3D,   !F,  0xFF,   B,
+ .bits = {{
+   .offset = 1,
+   .mask = PIPE_CONTROL_MMIO_WRITE,
+   .expected = 0
+ }},
+ .bits_count = 1  ),
 };
 
 static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b99bacf..6592d0d 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -319,6 +319,7 @@
 #define   DISPLAY_PLANE_B   (120)
 #define GFX_OP_PIPE_CONTROL(len)   ((0x329)|(0x327)|(0x224)|(len-2))
 #define   PIPE_CONTROL_GLOBAL_GTT_IVB  (124) /* gen7+ */
+#define   PIPE_CONTROL_MMIO_WRITE  (123)
 #define   PIPE_CONTROL_CS_STALL(120)
 #define   PIPE_CONTROL_TLB_INVALIDATE  (118)
 #define   PIPE_CONTROL_QW_WRITE(114)
@@ -359,6 +360,8 @@
 
 #define PIPELINE_SELECT
((0x329)|(0x127)|(0x124)|(0x416))
 #define GFX_OP_3DSTATE_VF_STATISTICS   
((0x329)|(0x127)|(0x024)|(0xB16))
+#define MEDIA_VFE_STATE
((0x329)|(0x227)|(0x024)|(0x016))
+#define  MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18)
 #define GPGPU_OBJECT   
((0x329)|(0x227)|(0x124)|(0x416))
 #define GPGPU_WALKER   
((0x329)|(0x227)|(0x124)|(0x516))
 #define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \
-- 
1.8.5.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 6/6] tests/gem_exec_parse: Test a command crossing a page boundary

2014-01-29 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

This is a speculative test in that it's not particularly relevant
today, but is important if we switch the parser implementation to
use kmap_atomic instead of vmap.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 tests/gem_exec_parse.c | 68 ++
 1 file changed, 68 insertions(+)

diff --git a/tests/gem_exec_parse.c b/tests/gem_exec_parse.c
index 004c3bf..455bfbf 100644
--- a/tests/gem_exec_parse.c
+++ b/tests/gem_exec_parse.c
@@ -136,6 +136,60 @@ static int exec_batch(int fd, uint32_t cmd_bo, uint32_t 
*cmds,
return 1;
 }
 
+static int exec_split_batch(int fd, uint32_t *cmds,
+   int size, int ring, int expected_ret)
+{
+   struct drm_i915_gem_execbuffer2 execbuf;
+   struct drm_i915_gem_exec_object2 objs[1];
+   uint32_t cmd_bo;
+   uint32_t noop[1024] = { 0 };
+   int ret;
+
+   // Allocate and fill a 2-page batch with noops
+   cmd_bo = gem_create(fd, 4096 * 2);
+   gem_write(fd, cmd_bo, 0, noop, sizeof(noop));
+   gem_write(fd, cmd_bo, 4096, noop, sizeof(noop));
+
+   // Write the provided commands such that the first dword
+   // of the command buffer is the last dword of the first
+   // page (i.e. the command is split across the two pages).
+   gem_write(fd, cmd_bo, 4096-sizeof(uint32_t), cmds, size);
+
+   objs[0].handle = cmd_bo;
+   objs[0].relocation_count = 0;
+   objs[0].relocs_ptr = 0;
+   objs[0].alignment = 0;
+   objs[0].offset = 0;
+   objs[0].flags = 0;
+   objs[0].rsvd1 = 0;
+   objs[0].rsvd2 = 0;
+
+   execbuf.buffers_ptr = (uintptr_t)objs;
+   execbuf.buffer_count = 1;
+   execbuf.batch_start_offset = 0;
+   execbuf.batch_len = size;
+   execbuf.cliprects_ptr = 0;
+   execbuf.num_cliprects = 0;
+   execbuf.DR1 = 0;
+   execbuf.DR4 = 0;
+   execbuf.flags = ring;
+   i915_execbuffer2_set_context_id(execbuf, 0);
+   execbuf.rsvd2 = 0;
+
+   ret = drmIoctl(fd,
+  DRM_IOCTL_I915_GEM_EXECBUFFER2,
+  execbuf);
+   if (ret == 0)
+   igt_assert(expected_ret == 0);
+   else
+   igt_assert(-errno == expected_ret);
+
+   gem_sync(fd, cmd_bo);
+   gem_close(fd, cmd_bo);
+
+   return 1;
+}
+
 uint32_t handle;
 int fd;
 
@@ -266,6 +320,20 @@ igt_main
  -EINVAL));
}
 
+   igt_subtest(cmd-crossing-page) {
+   uint32_t lri_ok[] = {
+   MI_LOAD_REGISTER_IMM,
+   0x5280, // allowed register address (SO_WRITE_OFFSET[0])
+   0x1,
+   MI_BATCH_BUFFER_END,
+   };
+   igt_assert(
+  exec_split_batch(fd,
+   lri_ok, sizeof(lri_ok),
+   I915_EXEC_RENDER,
+   0));
+   }
+
igt_fixture {
gem_close(fd, handle);
 
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 01/22] drm/i915: Add data structures for command parser

2013-11-26 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

The command parser needs to know a few things about certain commands
in order to process them correctly. Add structures for storing that
information.

OTC-Tracker: AXIA-4631
Change-Id: I50b98c71c6655893291c78a2d1b8954577b37a30
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h | 51 +
 1 file changed, 51 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 14f250a..ff1e201 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1731,6 +1731,57 @@ struct drm_i915_file_private {
atomic_t rps_wait_boost;
 };
 
+/**
+ * A command that requires special handling by the command parser.
+ */
+struct drm_i915_cmd_descriptor {
+   /**
+* Flags describing how the command parser processes the command.
+*
+* CMD_DESC_FIXED: The command has a fixed length if this is set,
+* a length mask if not set
+* CMD_DESC_SKIP: The command is allowed but does not follow the
+*standard length encoding for the opcode range in
+*which it falls
+*/
+   u32 flags;
+#define CMD_DESC_FIXED (10)
+#define CMD_DESC_SKIP  (11)
+
+   /**
+* The command's unique identification bits and the bitmask to get them.
+* This isn't strictly the opcode field as defined in the spec and may
+* also include type, subtype, and/or subop fields.
+*/
+   struct {
+   u32 value;
+   u32 mask;
+   } cmd;
+
+   /**
+* The command's length. The command is either fixed length (i.e. does
+* not include a length field) or has a length field mask. The flag
+* CMD_DESC_FIXED indicates a fixed length. Otherwise, the command has
+* a length mask. All command entries in a command table must include
+* length information.
+*/
+   union {
+   u32 fixed;
+   u32 mask;
+   } length;
+};
+
+/**
+ * A table of commands requiring special handling by the command parser.
+ *
+ * Each ring has an array of tables. Each table consists of an array of command
+ * descriptors, which must be sorted with command opcodes in ascending order.
+ */
+struct drm_i915_cmd_table {
+   const struct drm_i915_cmd_descriptor *table;
+   int count;
+};
+
 #define INTEL_INFO(dev)(to_i915(dev)-info)
 
 #define IS_I830(dev)   ((dev)-pdev-device == 0x3577)
-- 
1.8.4.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 04/22] drm/i915: Add per-ring command length decode functions

2013-11-26 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

For commands that aren't in the parser's tables, we get the length
based on standard per-ring command encodings for specific opcode ranges.

These functions just return the bitmask and the parser will extract the
actual length value.

OTC-Tracker: AXIA-4631
Change-Id: I2729d4483931cb4aea9403fd43710c4d4e8e5e89
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c  | 62 +
 drivers/gpu/drm/i915/intel_ringbuffer.h | 12 +++
 2 files changed, 74 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 014e661..247d530 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -137,6 +137,62 @@ static const struct drm_i915_cmd_table gen7_blt_cmds[] = {
{ blt_cmds, ARRAY_SIZE(blt_cmds) },
 };
 
+#define CLIENT_MASK  0xE000
+#define SUBCLIENT_MASK   0x1800
+#define MI_CLIENT0x
+#define RC_CLIENT0x6000
+#define BC_CLIENT0x4000
+#define MEDIA_SUBCLIENT  0x1000
+
+static u32 gen7_render_get_cmd_length_mask(u32 cmd_header)
+{
+   u32 client = cmd_header  CLIENT_MASK;
+   u32 subclient = cmd_header  SUBCLIENT_MASK;
+
+   if (client == MI_CLIENT)
+   return 0x3F;
+   else if (client == RC_CLIENT) {
+   if (subclient == MEDIA_SUBCLIENT)
+   return 0x;
+   else
+   return 0xFF;
+   }
+
+   DRM_DEBUG_DRIVER(CMD: Abnormal rcs cmd length! 0x%08X\n, cmd_header);
+   return 0;
+}
+
+static u32 gen7_bsd_get_cmd_length_mask(u32 cmd_header)
+{
+   u32 client = cmd_header  CLIENT_MASK;
+   u32 subclient = cmd_header  SUBCLIENT_MASK;
+
+   if (client == MI_CLIENT)
+   return 0x3F;
+   else if (client == RC_CLIENT) {
+   if (subclient == MEDIA_SUBCLIENT)
+   return 0xFFF;
+   else
+   return 0xFF;
+   }
+
+   DRM_DEBUG_DRIVER(CMD: Abnormal bsd cmd length! 0x%08X\n, cmd_header);
+   return 0;
+}
+
+static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
+{
+   u32 client = cmd_header  CLIENT_MASK;
+
+   if (client == MI_CLIENT)
+   return 0x3F;
+   else if (client == BC_CLIENT)
+   return 0xFF;
+
+   DRM_DEBUG_DRIVER(CMD: Abnormal blt cmd length! 0x%08X\n, cmd_header);
+   return 0;
+}
+
 void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring)
 {
if (!IS_GEN7(ring-dev))
@@ -152,18 +208,24 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer 
*ring)
ring-cmd_tables = gen7_render_cmds;
ring-cmd_table_count = ARRAY_SIZE(gen7_render_cmds);
}
+
+   ring-get_cmd_length_mask = gen7_render_get_cmd_length_mask;
break;
case VCS:
ring-cmd_tables = gen7_video_cmds;
ring-cmd_table_count = ARRAY_SIZE(gen7_video_cmds);
+   ring-get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
break;
case BCS:
ring-cmd_tables = gen7_blt_cmds;
ring-cmd_table_count = ARRAY_SIZE(gen7_blt_cmds);
+   ring-get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
break;
case VECS:
ring-cmd_tables = hsw_vebox_cmds;
ring-cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds);
+   /* VECS can use the same length_mask function as VCS */
+   ring-get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
break;
default:
DRM_DEBUG(CMD: cmd_parser_init with unknown ring: %d\n,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 67305d3..8e71b59 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -169,6 +169,18 @@ struct  intel_ring_buffer {
 */
const struct drm_i915_cmd_table *cmd_tables;
int cmd_table_count;
+
+   /**
+* Returns the bitmask for the length field of the specified command.
+* Return 0 for an unrecognized/invalid command.
+*
+* If the command parser finds an entry for a command in the ring's
+* cmd_tables, it gets the command's length based on the table entry.
+* If not, it calls this function to determine the per-ring length field
+* encoding for the command (i.e. certain opcode ranges use certain bits
+* to encode the command length in the header).
+*/
+   u32 (*get_cmd_length_mask)(u32 cmd_header);
 };
 
 static inline bool
-- 
1.8.4.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 15/22] drm/i915: Reject commands that would store to global HWS page

2013-11-26 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

PIPE_CONTROL and MI_FLUSH_DW have bits that would write to the
hardware status page. There are no users of this today and it
seems unsafe.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 30 ++
 drivers/gpu/drm/i915/i915_reg.h|  1 +
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 7b30a03..f32dc69 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -131,7 +131,8 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = 
{
  },
  {
.offset = 1,
-   .mask = PIPE_CONTROL_GLOBAL_GTT_IVB,
+   .mask = (PIPE_CONTROL_GLOBAL_GTT_IVB |
+PIPE_CONTROL_STORE_DATA_INDEX),
.expected = 0,
.condition_offset = 1,
.condition_mask = PIPE_CONTROL_POST_SYNC_OP_MASK
@@ -167,8 +168,15 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = 
{
.expected = 0,
.condition_offset = 0,
.condition_mask = MI_FLUSH_DW_OP_MASK
+ },
+ {
+   .offset = 0,
+   .mask = MI_FLUSH_DW_STORE_INDEX,
+   .expected = 0,
+   .condition_offset = 0,
+   .condition_mask = MI_FLUSH_DW_OP_MASK
  }},
- .bits_count = 2  ),
+ .bits_count = 3  ),
CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   B,
  .bits = {{
.offset = 0,
@@ -192,8 +200,15 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
.expected = 0,
.condition_offset = 0,
.condition_mask = MI_FLUSH_DW_OP_MASK
+ },
+ {
+   .offset = 0,
+   .mask = MI_FLUSH_DW_STORE_INDEX,
+   .expected = 0,
+   .condition_offset = 0,
+   .condition_mask = MI_FLUSH_DW_OP_MASK
  }},
- .bits_count = 2  ),
+ .bits_count = 3  ),
CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   B,
  .bits = {{
.offset = 0,
@@ -217,8 +232,15 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = {
.expected = 0,
.condition_offset = 0,
.condition_mask = MI_FLUSH_DW_OP_MASK
+ },
+ {
+   .offset = 0,
+   .mask = MI_FLUSH_DW_STORE_INDEX,
+   .expected = 0,
+   .condition_offset = 0,
+   .condition_mask = MI_FLUSH_DW_OP_MASK
  }},
- .bits_count = 2  ),
+ .bits_count = 3  ),
CMD(  COLOR_BLT,S2D,   !F,  0x3F,   S  ),
CMD(  SRC_COPY_BLT, S2D,   !F,  0x3F,   S  ),
 };
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3f64d41..919d1a6 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -323,6 +323,7 @@
 #define GFX_OP_PIPE_CONTROL(len)   ((0x329)|(0x327)|(0x224)|(len-2))
 #define   PIPE_CONTROL_GLOBAL_GTT_IVB  (124) /* gen7+ */
 #define   PIPE_CONTROL_MMIO_WRITE  (123)
+#define   PIPE_CONTROL_STORE_DATA_INDEX(121)
 #define   PIPE_CONTROL_CS_STALL(120)
 #define   PIPE_CONTROL_TLB_INVALIDATE  (118)
 #define   PIPE_CONTROL_QW_WRITE(114)
-- 
1.8.4.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 18/22] drm/i915: Reject MI_ARB_ON_OFF on VECS

2013-11-26 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index c8426af..5593740 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -197,6 +197,7 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = {
 };
 
 static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
+   CMD(  MI_ARB_ON_OFF,SMI,F,  1,  R  ),
CMD(  MI_FLUSH_DW,  SMI,   !F,  0x3F,   B,
  .bits = {{
.offset = 0,
-- 
1.8.4.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 06/22] drm/i915: Add a HAS_CMD_PARSER getparam

2013-11-26 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

So userspace can query the kernel for command parser support.

OTC-Tracker: AXIA-4631
Change-Id: I58af650db9f6753c2dcac9c54ab432fd31db302f
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_dma.c | 3 +++
 include/uapi/drm/i915_drm.h | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 5aeb103..f0a4638 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1003,6 +1003,9 @@ static int i915_getparam(struct drm_device *dev, void 
*data,
case I915_PARAM_HAS_EXEC_HANDLE_LUT:
value = 1;
break;
+   case I915_PARAM_HAS_CMD_PARSER:
+   value = 1;
+   break;
default:
DRM_DEBUG(Unknown parameter %d\n, param-param);
return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 52aed89..48cc277 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -337,6 +337,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_NO_RELOC25
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
 #define I915_PARAM_HAS_WT   27
+#define I915_PARAM_HAS_CMD_PARSER   28
 
 typedef struct drm_i915_getparam {
int param;
-- 
1.8.4.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 09/22] drm/i915: Add support for rejecting commands via bitmasks

2013-11-26 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

A variety of checks we want to do amount to verifying that a given
bit or bits are set/clear in a given dword of a command. For now,
allow a small but arbitrary number of bitmasks for each command.

OTC-Tracker: AXIA-4631
Change-Id: Icc77316c243b6e218774c15e2c090cc470d59317
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 22 ++
 drivers/gpu/drm/i915/i915_drv.h| 16 
 2 files changed, 38 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 2dbca01..99d15f3 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -400,6 +400,28 @@ int i915_parse_cmds(struct intel_ring_buffer *ring,
}
}
 
+   if (desc-flags  CMD_DESC_BITMASK) {
+   int i;
+
+   for (i = 0; i  desc-bits_count; i++) {
+   u32 dword = cmd[desc-bits[i].offset] 
+   desc-bits[i].mask;
+
+   if (dword != desc-bits[i].expected) {
+   DRM_DEBUG_DRIVER(CMD: Rejected command 
0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (ring=%d)\n,
+*cmd,
+desc-bits[i].mask,
+desc-bits[i].expected,
+dword, ring-id);
+   ret = -EINVAL;
+   break;
+   }
+   }
+
+   if (ret)
+   break;
+   }
+
cmd += length;
}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 83b6031..f31fc68 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1752,6 +1752,7 @@ struct drm_i915_cmd_descriptor {
 #define CMD_DESC_SKIP (11)
 #define CMD_DESC_REJECT   (12)
 #define CMD_DESC_REGISTER (13)
+#define CMD_DESC_BITMASK  (14)
 
/**
 * The command's unique identification bits and the bitmask to get them.
@@ -1784,6 +1785,21 @@ struct drm_i915_cmd_descriptor {
u32 offset;
u32 mask;
} reg;
+
+#define MAX_CMD_DESC_BITMASKS 3
+   /**
+* Describes command checks where a particular dword is masked and
+* compared against an expected value. If the command does not match
+* the expected value, the parser rejects it. Only valid if flags has
+* the CMD_DESC_BITMASK bit set.
+*/
+   struct {
+   u32 offset;
+   u32 mask;
+   u32 expected;
+   } bits[MAX_CMD_DESC_BITMASKS];
+   /** Number of valid entries in the bits array */
+   int bits_count;
 };
 
 /**
-- 
1.8.4.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 14/22] drm/i915: Enable PPGTT command parser checks

2013-11-26 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Various commands that access memory have a bit to determine whether
the graphics address specified in the command should use the GGTT or
PPGTT for translation. These checks ensure that the bit indicates
PPGTT translation.

Most of these checks use the existing bit-checking infrastructure.
The PIPE_CONTROL and MI_FLUSH_DW commands, however, are multi-function
commands. The GGTT/PPGTT bit is only relevant for certain uses of the
command. As such, this change also extends the bit-checking code to
include a condition mask and offset. If the condition mask is non-zero
then the parser only performs the bit check when the bits specified by
the condition mask/offset are also non-zero.

NOTE: At this point in the series PPGTT must be enabled for the parser
to work correctly. If it's not enabled, userspace will not be setting
the PPGTT bits the way the parser requires. There's a WARN_ON to detect
this case.

OTC-Tracker: AXIA-4631
Change-Id: I3f4c76b6734f1956ec47e698230f97d0998ff92b
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 110 ++---
 drivers/gpu/drm/i915/i915_drv.h|   6 ++
 drivers/gpu/drm/i915/i915_reg.h|   5 ++
 3 files changed, 111 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index b881d39..7b30a03 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -61,15 +61,33 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = 
{
CMD(  MI_REPORT_HEAD,   SMI,F,  1,  S  ),
CMD(  MI_SUSPEND_FLUSH, SMI,F,  1,  S  ),
CMD(  MI_SEMAPHORE_MBOX,SMI,   !F,  0xFF,   R  ),
-   CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0x3FF,  S  ),
+   CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0x3FF,  B,
+ .bits = {{
+   .offset = 0,
+   .mask = MI_GLOBAL_GTT,
+   .expected = 0
+ }},
+ .bits_count = 1  ),
CMD(  MI_STORE_DWORD_INDEX, SMI,   !F,  0xFF,   R  ),
CMD(  MI_LOAD_REGISTER_IMM(1),  SMI,   !F,  0xFF,   W,
  .reg = { .offset = 1, .mask = 0x007C }   ),
CMD(  MI_UPDATE_GTT,SMI,   !F,  0xFF,   R  ),
-   CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   W,
- .reg = { .offset = 1, .mask = 0x007C }   ),
-   CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   W,
- .reg = { .offset = 1, .mask = 0x007C }   ),
+   CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   W | B,
+ .reg = { .offset = 1, .mask = 0x007C },
+ .bits = {{
+   .offset = 0,
+   .mask = MI_GLOBAL_GTT,
+   .expected = 0
+ }},
+ .bits_count = 1  ),
+   CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   W | B,
+ .reg = { .offset = 1, .mask = 0x007C },
+ .bits = {{
+   .offset = 0,
+   .mask = MI_GLOBAL_GTT,
+   .expected = 0
+ }},
+ .bits_count = 1  ),
CMD(  MI_BATCH_BUFFER_START,SMI,   !F,  0xFF,   S  ),
 };
 
@@ -79,7 +97,20 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = {
CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   R  ),
CMD(  MI_PREDICATE, SMI,F,  1,  S  ),
CMD(  MI_TOPOLOGY_FILTER,   SMI,F,  1,  S  ),
-   CMD(  MI_CLFLUSH,   SMI,   !F,  0x3FF,  S  ),
+   CMD(  MI_CLFLUSH,   SMI,   !F,  0x3FF,  B,
+ .bits = {{
+   .offset = 0,
+   .mask = MI_GLOBAL_GTT,
+   .expected = 0
+ }},
+ .bits_count = 1  ),
+   CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   B,
+ .bits = {{
+   .offset = 0,
+   .mask = MI_GLOBAL_GTT,
+   .expected = 0
+ }},
+ .bits_count = 1  ),
CMD(  GFX_OP_3DSTATE_VF_STATISTICS, S3D,F,  1,  S  ),
CMD(  PIPELINE_SELECT,  S3D,F,  1,  S  ),
CMD(  MEDIA_VFE_STATE,  S3D,   !F,  0x, B,
@@ -97,8 +128,15 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = 
{
.offset = 1,

[Intel-gfx] [RFC 07/22] drm/i915: Add support for rejecting commands during parsing

2013-11-26 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

Certain commands are always disallowed from userspace. This adds
the ability for the command parser to detect such commands and
reject batch buffers containing them.

OTC-Tracker: AXIA-4631
Change-Id: I000b0df4d441ec80b607a50d35e83418cdfd38b3
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 6 ++
 drivers/gpu/drm/i915/i915_drv.h| 6 --
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index b01628e..c64f640 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -368,6 +368,12 @@ int i915_parse_cmds(struct intel_ring_buffer *ring,
break;
}
 
+   if (desc-flags  CMD_DESC_REJECT) {
+   DRM_DEBUG_DRIVER(CMD: Rejected command: 0x%08X\n, 
*cmd);
+   ret = -EINVAL;
+   break;
+   }
+
cmd += length;
}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 81ef047..6ace856 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1743,10 +1743,12 @@ struct drm_i915_cmd_descriptor {
 * CMD_DESC_SKIP: The command is allowed but does not follow the
 *standard length encoding for the opcode range in
 *which it falls
+* CMD_DESC_REJECT: The command is never allowed
 */
u32 flags;
-#define CMD_DESC_FIXED (10)
-#define CMD_DESC_SKIP  (11)
+#define CMD_DESC_FIXED  (10)
+#define CMD_DESC_SKIP   (11)
+#define CMD_DESC_REJECT (12)
 
/**
 * The command's unique identification bits and the bitmask to get them.
-- 
1.8.4.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 10/22] drm/i915: Reject unsafe commands

2013-11-26 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

These commands allow userspace to affect global state.

OTC-Tracker: AXIA-4631
Change-Id: I80a22c9cd83181790d2a9064e70ea09326691b66
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 99d15f3..8ee4cda 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -47,6 +47,7 @@
 #define SMFX STD_MFX_OPCODE_MASK
 #define F CMD_DESC_FIXED
 #define S CMD_DESC_SKIP
+#define R CMD_DESC_REJECT
 
 /*Command  Mask   Fixed Len   Action
  -- */
@@ -57,10 +58,11 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = 
{
CMD(  MI_ARB_CHECK, SMI,F,  1,  S  ),
CMD(  MI_REPORT_HEAD,   SMI,F,  1,  S  ),
CMD(  MI_SUSPEND_FLUSH, SMI,F,  1,  S  ),
-   CMD(  MI_SEMAPHORE_MBOX,SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_SEMAPHORE_MBOX,SMI,   !F,  0xFF,   R  ),
CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0x3FF,  S  ),
-   CMD(  MI_STORE_DWORD_INDEX, SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_STORE_DWORD_INDEX, SMI,   !F,  0xFF,   R  ),
CMD(  MI_LOAD_REGISTER_IMM(1),  SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_UPDATE_GTT,SMI,   !F,  0xFF,   R  ),
CMD(  MI_STORE_REGISTER_MEM(1), SMI,   !F,  0xFF,   S  ),
CMD(  MI_LOAD_REGISTER_MEM, SMI,   !F,  0xFF,   S  ),
CMD(  MI_BATCH_BUFFER_START,SMI,   !F,  0xFF,   S  ),
@@ -68,8 +70,8 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
 
 static const struct drm_i915_cmd_descriptor render_cmds[] = {
CMD(  MI_FLUSH, SMI,F,  1,  S  ),
-   CMD(  MI_ARB_ON_OFF,SMI,F,  1,  S  ),
-   CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_ARB_ON_OFF,SMI,F,  1,  R  ),
+   CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   R  ),
CMD(  MI_PREDICATE, SMI,F,  1,  S  ),
CMD(  MI_TOPOLOGY_FILTER,   SMI,F,  1,  S  ),
CMD(  MI_CLFLUSH,   SMI,   !F,  0x3FF,  S  ),
@@ -94,12 +96,12 @@ static const struct drm_i915_cmd_descriptor 
hsw_render_cmds[] = {
 };
 
 static const struct drm_i915_cmd_descriptor video_cmds[] = {
-   CMD(  MI_ARB_ON_OFF,SMI,F,  1,  S  ),
+   CMD(  MI_ARB_ON_OFF,SMI,F,  1,  R  ),
CMD(  MFX_WAIT, SMFX,  !F,  0x3F,   S  ),
 };
 
 static const struct drm_i915_cmd_descriptor blt_cmds[] = {
-   CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   S  ),
+   CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   R  ),
CMD(  COLOR_BLT,S2D,   !F,  0x3F,   S  ),
CMD(  SRC_COPY_BLT, S2D,   !F,  0x3F,   S  ),
 };
@@ -111,6 +113,7 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = {
 #undef SMFX
 #undef F
 #undef S
+#undef R
 
 static const struct drm_i915_cmd_table gen7_render_cmds[] = {
{ common_cmds, ARRAY_SIZE(common_cmds) },
-- 
1.8.4.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 22/22] drm/i915: Enable command parsing by default

2013-11-26 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

OTC-Tracker: AXIA-4631
Change-Id: I6747457e1fe7494bd42787af51198fcba398ad78
Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 90d7db0..8c0d91b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -154,10 +154,10 @@ module_param_named(prefault_disable, 
i915_prefault_disable, bool, 0600);
 MODULE_PARM_DESC(prefault_disable,
Disable page prefaulting for pread/pwrite/reloc 
(default:false). For developers only.);
 
-int i915_enable_cmd_parser __read_mostly = 0;
+int i915_enable_cmd_parser __read_mostly = 1;
 module_param_named(enable_cmd_parser, i915_enable_cmd_parser, int, 0600);
 MODULE_PARM_DESC(enable_cmd_parser,
-   Enable command parsing (default: false));
+   Enable command parsing (default: true));
 
 static struct drm_driver driver;
 
-- 
1.8.4.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


  1   2   >