Re: [Mesa-dev] [PATCH mesa v2] vulkan/wsi: fix assignment in assert()

2018-07-26 Thread Lionel Landwerlin

On 25/07/18 20:03, Eric Engestrom wrote:

CovID: 1438113, 1438118, 1438119, 1438121
Fixes: dc1d10b396179766227df "anv,radv: Add support for 
VK_KHR_get_display_properties2"
Cc: Jason Ekstrand 
Signed-off-by: Eric Engestrom 


Reviewed-by: Lionel Landwerlin 


---
  src/vulkan/wsi/wsi_common_display.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/vulkan/wsi/wsi_common_display.c 
b/src/vulkan/wsi/wsi_common_display.c
index ac932d4368a0293fe97e..e6cba188dfaf8d790415 100644
--- a/src/vulkan/wsi/wsi_common_display.c
+++ b/src/vulkan/wsi/wsi_common_display.c
@@ -619,7 +619,7 @@ wsi_display_fill_in_display_mode_properties(
 struct wsi_display_mode *display_mode,
 VkDisplayModeProperties2KHR *properties)
  {
-   assert(properties->sType = VK_STRUCTURE_TYPE_DISPLAY_MODE_PROPERTIES_2_KHR);
+   assert(properties->sType == 
VK_STRUCTURE_TYPE_DISPLAY_MODE_PROPERTIES_2_KHR);
 VkDisplayModePropertiesKHR *prop = >displayModeProperties;
  
 prop->displayMode = wsi_display_mode_to_handle(display_mode);

@@ -763,7 +763,7 @@ wsi_get_display_plane_capabilities2(
 const VkDisplayPlaneInfo2KHR *pDisplayPlaneInfo,
 VkDisplayPlaneCapabilities2KHR *capabilities)
  {
-   assert(capabilities->sType =
+   assert(capabilities->sType ==
VK_STRUCTURE_TYPE_DISPLAY_PLANE_CAPABILITIES_2_KHR);
  
 return wsi_get_display_plane_capabilities(physical_device, wsi_device,



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH mesa] vulkan/wsi: fix assignment in assert()

2018-07-26 Thread Lionel Landwerlin

On 25/07/18 19:54, Eric Engestrom wrote:

CovID: 1438121
Fixes: dc1d10b396179766227df "anv,radv: Add support for 
VK_KHR_get_display_properties2"
Cc: Jason Ekstrand 
Signed-off-by: Eric Engestrom 


Reviewed-by: Lionel Landwerlin 


---
  src/vulkan/wsi/wsi_common_display.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vulkan/wsi/wsi_common_display.c 
b/src/vulkan/wsi/wsi_common_display.c
index ac932d4368a0293fe97e..7cabb7780db29c8a44d7 100644
--- a/src/vulkan/wsi/wsi_common_display.c
+++ b/src/vulkan/wsi/wsi_common_display.c
@@ -763,7 +763,7 @@ wsi_get_display_plane_capabilities2(
 const VkDisplayPlaneInfo2KHR *pDisplayPlaneInfo,
 VkDisplayPlaneCapabilities2KHR *capabilities)
  {
-   assert(capabilities->sType =
+   assert(capabilities->sType ==
VK_STRUCTURE_TYPE_DISPLAY_PLANE_CAPABILITIES_2_KHR);
  
 return wsi_get_display_plane_capabilities(physical_device, wsi_device,



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH mesa] anv: don't crash on vkDestroyDevice(NULL)

2018-07-26 Thread Lionel Landwerlin

On 25/07/18 19:45, Eric Engestrom wrote:

CovID: 1438132
Signed-off-by: Eric Engestrom 


Reviewed-by: Lionel Landwerlin 


---
  src/intel/vulkan/anv_device.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 04fd6a829ed60081abc4..3664f80c24dc34955196 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1832,11 +1832,13 @@ void anv_DestroyDevice(
  const VkAllocationCallbacks*pAllocator)
  {
 ANV_FROM_HANDLE(anv_device, device, _device);
-   struct anv_physical_device *physical_device = 
>instance->physicalDevice;
+   struct anv_physical_device *physical_device;
  
 if (!device)

return;
  
+   physical_device = >instance->physicalDevice;

+
 anv_device_finish_blorp(device);
  
 anv_pipeline_cache_finish(>default_pipeline_cache);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] intel: Make the decoder handle STATE_BASE_ADDRESS not being a buffer.

2018-07-25 Thread Lionel Landwerlin

Hey Ken,

Looks all good to me.

Just one suggestion which I think might matter in your prototype :

The handle_state_base_address() function tries to get BOs when the 
instruction is parsed.
But as you seem to imply those base addresses might not be backed by 
actual buffers.

I would turn the gen_batch_decode_bo in gen_batch_decode_ctx into uint64_t.
We always seem to use bo.addr but not the map, so no need to bother 
getting those.


With or without that changed :

Reviewed-by: Lionel Landwerlin 

Thanks!

On 24/07/18 22:55, Kenneth Graunke wrote:

Normally, i965 programs STATE_BASE_ADDRESS every batch, and puts all
state for a given base in a single buffer.

I'm working on a prototype which emits STATE_BASE_ADDRESS only once at
startup, where each base address is a fixed 4GB region of the PPGTT.
State may live in many buffers in that 4GB region, even if there isn't
a buffer located at the actual base address itself.

To handle this, we need to save the STATE_BASE_ADDRESS values across
multiple batches, rather than assuming we'll see the command each time.
Then, each time we see a pointer, we need to ask the driver for the BO
map for that data.  (We can't just use the map for the base address, as
state may be in multiple buffers, and there may not even be a buffer
at the base address to map.)

v2: Fix things caught in review by Lionel:
  - Drop bogus bind_bo.size check.
  - Drop "get the BOs again" code - we just get the BOs as needed
  - Add a message about interface descriptor data being unavailable
---
  src/intel/common/gen_batch_decoder.c | 75 +++-
  src/intel/common/gen_decoder.h   |  9 +++-
  2 files changed, 46 insertions(+), 38 deletions(-)

diff --git a/src/intel/common/gen_batch_decoder.c 
b/src/intel/common/gen_batch_decoder.c
index 727cbb80cfb..c6967ebc053 100644
--- a/src/intel/common/gen_batch_decoder.c
+++ b/src/intel/common/gen_batch_decoder.c
@@ -128,13 +128,13 @@ static void
  ctx_disassemble_program(struct gen_batch_decode_ctx *ctx,
  uint32_t ksp, const char *type)
  {
-   if (!ctx->instruction_base.map)
+   uint64_t addr = ctx->instruction_base.addr + ksp;
+   struct gen_batch_decode_bo bo = ctx_get_bo(ctx, addr);
+   if (!bo.map)
return;
  
-   printf("\nReferenced %s:\n", type);

-   gen_disasm_disassemble(ctx->disasm,
-  (void *)ctx->instruction_base.map, ksp,
-  ctx->fp);
+   fprintf(ctx->fp, "\nReferenced %s:\n", type);
+   gen_disasm_disassemble(ctx->disasm, bo.map, 0, ctx->fp);
  }
  
  /* Heuristic to determine whether a uint32_t is probably actually a float

@@ -225,35 +225,30 @@ dump_binding_table(struct gen_batch_decode_ctx *ctx, 
uint32_t offset, int count)
 if (count < 0)
count = update_count(ctx, offset, 1, 8);
  
-   if (ctx->surface_base.map == NULL) {

-  fprintf(ctx->fp, "  binding table unavailable\n");
+   if (offset % 32 != 0 || offset >= UINT16_MAX) {
+  fprintf(ctx->fp, "  invalid binding table pointer\n");
return;
 }
  
-   if (offset % 32 != 0 || offset >= UINT16_MAX ||

-   offset >= ctx->surface_base.size) {
-  fprintf(ctx->fp, "  invalid binding table pointer\n");
+   struct gen_batch_decode_bo bind_bo =
+  ctx_get_bo(ctx, ctx->surface_base.addr + offset);
+
+   if (bind_bo.map == NULL) {
+  fprintf(ctx->fp, "  binding table unavailable\n");
return;
 }
  
-   struct gen_batch_decode_bo bo = ctx->surface_base;

-   const uint32_t *pointers = ctx->surface_base.map + offset;
+   const uint32_t *pointers = bind_bo.map;
 for (int i = 0; i < count; i++) {
if (pointers[i] == 0)
   continue;
  
-  if (pointers[i] % 32 != 0) {

- fprintf(ctx->fp, "pointer %u: %08x \n", i, pointers[i]);
- continue;
-  }
-
uint64_t addr = ctx->surface_base.addr + pointers[i];
+  struct gen_batch_decode_bo bo = ctx_get_bo(ctx, addr);
uint32_t size = strct->dw_length * 4;
  
-  if (addr < bo.addr || addr + size >= bo.addr + bo.size)

- bo = ctx->get_bo(ctx->user_data, addr);
-
-  if (addr < bo.addr || addr + size >= bo.addr + bo.size) {
+  if (pointers[i] % 32 != 0 ||
+  addr < bo.addr || addr + size >= bo.addr + bo.size) {
   fprintf(ctx->fp, "pointer %u: %08x \n", i, pointers[i]);
   continue;
}
@@ -271,18 +266,20 @@ dump_samplers(struct gen_batch_decode_ctx *ctx, uint32_t 
offset, int count)
 if (count < 0)
count = update_count(ctx, offset, strct->dw_length, 4);
  
-   if (ctx->dynamic_base.map == NULL) {

+   uint64_t state_addr = ctx->dynamic_base.addr + offset;
+   struct gen_batch_decode_bo bo = ctx_get_bo(ctx, state_addr);
+   const void *state_map = bo.map;
+
+   if (state_map 

[Mesa-dev] [PATCH] intel: tools: dump: only store device id on success

2018-07-24 Thread Lionel Landwerlin
We might fail on master node drm fd because we won't have the right
permissions.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/intel_dump_gpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 1abe54147cf..a71103f1889 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -111,7 +111,7 @@ align_u32(uint32_t v, uint32_t a)
 }
 
 static struct gen_device_info devinfo = {0};
-static uint32_t device;
+static uint32_t device = 0;
 static struct aub_file aub_file;
 
 static void *
@@ -419,7 +419,7 @@ ioctl(int fd, unsigned long request, ...)
   * (they typically do), we'll piggy-back on
   * their ioctl and store the id for later
   * use. */
- if (getparam->param == I915_PARAM_CHIPSET_ID)
+ if (ret == 0 && getparam->param == I915_PARAM_CHIPSET_ID)
 device = *getparam->value;
 
  return ret;
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir/serialize: Alloc constants off the variable

2018-07-24 Thread Lionel Landwerlin

On 24/07/18 19:02, Jason Ekstrand wrote:

nir_sweep assumes that constants area always allocated off the variable
to which they belong.  Violating this assumption causes them to get
freed early and leads to use-after-free bugs.

Cc: mesa-sta...@lists.freedesktop.org


Reviewed-by: Lionel Landwerlin 


---
  src/compiler/nir/nir_serialize.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c
index 6a30738c2d7..43016310048 100644
--- a/src/compiler/nir/nir_serialize.c
+++ b/src/compiler/nir/nir_serialize.c
@@ -124,7 +124,7 @@ read_constant(read_ctx *ctx, nir_variable *nvar)
  
 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));

 c->num_elements = blob_read_uint32(ctx->blob);
-   c->elements = ralloc_array(ctx->nir, nir_constant *, c->num_elements);
+   c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
 for (unsigned i = 0; i < c->num_elements; i++)
c->elements[i] = read_constant(ctx, nvar);
  



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] intel/ppgtt: memory address alignment

2018-07-24 Thread Lionel Landwerlin

On 24/07/18 18:34, Kenneth Graunke wrote:

On Tuesday, July 24, 2018 5:34:57 AM PDT Lionel Landwerlin wrote:

That looks correct to me (and we do the same in Anv).
Also a bit baffled that we haven't run into issues earlier :(

But would be good to have Ken's Rb too.

Thanks a lot!

Reviewed-by: Lionel Landwerlin 

Yeah, this is probably for the best...we used to just ask the kernel
for memory and it would do this for us.  Now that we're doing it
ourselves, we ought to be defensive here.

Reviewed-by: Kenneth Graunke 

But I agree with Chris, I'm surprised that this would actually fix
anything...all of our allocations ought to multiples of PAGE_SIZE,
so unless we're starting at a funny address, they ought to remain
that way...

I wonder if this isn't papering over another bug.


Sergii,

If you can reproduce this bug locally, would you mind adding

 assert(size % 4096 == 0);

at the top of vma_alloc() and see if it hits the asserts.
Having a backtrace would be great :)

Thanks a lot,

-
Lionel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Context aware user space EU control through application

2018-07-24 Thread Lionel Landwerlin

On 24/07/18 17:41, Marathe, Yogesh wrote:

Lionel, Chris,


-Original Message-
From: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] On Behalf
Of Lionel Landwerlin
Sent: Friday, July 20, 2018 3:31 PM
To: Chris Wilson ; Muthukumar, Aravindan
; mesa-dev@lists.freedesktop.org
Subject: Re: [Mesa-dev] [PATCH 2/2] i965: Context aware user space EU control
through application

On 20/07/18 09:47, Chris Wilson wrote:

Quoting aravindan.muthuku...@intel.com (2018-07-20 09:32:57)

From: "Muthukumar, Aravindan" 

   The Patch here is to give control to user/ application to really
   decide what's the max GPU load it would put. If that can be
   known in advance, rpcs can be programmed accordingly.
   This solution has changes across i915,
   drm and mesa (not limited only to kernel).

   Here, we pass gpu_load_type = {high, medium, low} from application
   while context is created. Default here is 'High' and applications
   roughly know if they are going to eat up entire GPU. The typical
   usecase of 'Low' is idle screen or minor mouse movements. Users can
   read meaning of high/medium/low for their platform  & then program
   contexts accordingly. Here gpu_load_type directly translates to
   number of shader cores/EUs a particular GPU has.

   Signed-off-by: Aravindan Muthukumar 
   Signed-off-by: Kedar J Karanje 
   Signed-off-by: Praveen Diwakar 
   Signed-off-by: Yogesh Marathe 
+/* Dynamic Eu control */
+struct drm_i915_load_type {
+   __u32 ctx_id;
+   __u32 load_type;
+};
+
+/* DYNAMIC EU CONTROL */
+int
+brw_hw_context_load_type(struct brw_bufmgr *bufmgr,
+uint32_t ctx_id,
+int load_type) {
+   struct drm_i915_load_type type = {
+   .ctx_id = ctx_id,
+   .load_type = load_type,
+   };
+   int err;
+
+   err = 0;
+   if(drmIoctl(bufmgr->fd, DRM_IOCTL_I915_LOAD_TYPE, ))
+   err = -errno;

This went through 4 people and none noticed that there already exists
a means to set per-context parameters. And it's even used right next
to this function.

The word hint needs to be firmly embedded around here.
-Chris
__

Yep,

Looks like you want to get involved in this discussion :
https://patchwork.freedesktop.org/series/42285/

I understand this is exposing per context eu config through debugfs. That mostly
(if not fully) matches the  kernel part of what we wanted to achieve. We have
additional code in kernel where we categorize based on load type and fix
a config per platform. For sure the kernel parts can be merged but the proposal
is different here and its specific to adding this capability through mesa.

Here we are enabling applications to decide load while creating the context and
making it simple for application programmers by abstracting it.  Also in these 
kernel
patches, its seems to be exposing the parameters to user space, are we 
discussing
its user space counterpart in mesa or in some other component? If not, I feel 
this
is bit different. Can it be a mesa extension? Then any app / process can do this
without having privilege (root).


Yes, I was just pointing out that you might want to reuse existing 
patches for i915 so that we don't end up with 2 similar interfaces there.

Obviously your extension would still be needed.

-
Lionel




-
Lionel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] intel/ppgtt: memory address alignment

2018-07-24 Thread Lionel Landwerlin

On 24/07/18 13:42, Chris Wilson wrote:

Quoting Lionel Landwerlin (2018-07-24 13:34:57)

That looks correct to me (and we do the same in Anv).
Also a bit baffled that we haven't run into issues earlier :(

All the allocations should be in multiples of page size, alignment less
than a page size should be a no-op. Tracking down who doesn't think
IS_ALIGNED(bo->size, PAGE_SIZE) would be interesting.
-Chris


Buckets?

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] intel/ppgtt: memory address alignment

2018-07-24 Thread Lionel Landwerlin

That looks correct to me (and we do the same in Anv).
Also a bit baffled that we haven't run into issues earlier :(

But would be good to have Ken's Rb too.

Thanks a lot!

Reviewed-by: Lionel Landwerlin 

On 24/07/18 12:50, Sergii Romantsov wrote:

Kernel (for ppgtt) requires memory address to be
aligned to page size (4096).
Added such alignment for buffers marked with EXEC_OBJECT_PINNED.

-v2: added marking that also fixes initial commit 01058a552294

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106997
Fixes: a363bb2cd0e2 (i965: Allocate VMA in userspace for full-PPGTT systems.)
Fixes: 01058a552294 (i965: Add virtual memory allocator infrastructure to 
brw_bufmgr.)
Signed-off-by: Sergii Romantsov 
---
  src/mesa/drivers/dri/i965/brw_bufmgr.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 09d45e3..8383735 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -643,7 +643,7 @@ retry:
 bo->kflags = bufmgr->initial_kflags;
  
 if ((bo->kflags & EXEC_OBJECT_PINNED) && bo->gtt_offset == 0ull) {

-  bo->gtt_offset = vma_alloc(bufmgr, memzone, bo->size, 1);
+  bo->gtt_offset = vma_alloc(bufmgr, memzone, bo->size, 4096);
  
if (bo->gtt_offset == 0ull)

   goto err_free;
@@ -784,7 +784,7 @@ brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
 bo->kflags = bufmgr->initial_kflags;
  
 if (bo->kflags & EXEC_OBJECT_PINNED)

-  bo->gtt_offset = vma_alloc(bufmgr, BRW_MEMZONE_OTHER, bo->size, 1);
+  bo->gtt_offset = vma_alloc(bufmgr, BRW_MEMZONE_OTHER, bo->size, 4096);
  
 _mesa_hash_table_insert(bufmgr->handle_table, >gem_handle, bo);

 _mesa_hash_table_insert(bufmgr->name_table, >global_name, bo);
@@ -1424,7 +1424,7 @@ brw_bo_gem_create_from_prime_internal(struct brw_bufmgr 
*bufmgr, int prime_fd,
  
 if (bo->kflags & EXEC_OBJECT_PINNED) {

assert(bo->size > 0);
-  bo->gtt_offset = vma_alloc(bufmgr, BRW_MEMZONE_OTHER, bo->size, 1);
+  bo->gtt_offset = vma_alloc(bufmgr, BRW_MEMZONE_OTHER, bo->size, 4096);
 }
  
 if (tiling_mode < 0) {



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] RFC : Context aware user space Resource control

2018-07-24 Thread Lionel Landwerlin

On 23/07/18 10:20, Kedar J Karanje wrote:

On Fri, Jul 20, 2018 at 10:59:03AM +0100, Lionel Landwerlin wrote:

On 20/07/18 09:32, aravindan.muthuku...@intel.com wrote:

diff --git a/src/egl/generate/egl.xml b/src/egl/generate/egl.xml
index 9250f93..52b0c9f 100644
--- a/src/egl/generate/egl.xml
+++ b/src/egl/generate/egl.xml
@@ -460,6 +460,14 @@
  
  
+
+
+
+
+
+
+
+
  
  
  

Are you defining a new extension here?
The chunk above seems to imply this is from IMG, but still commented as
reserved.

As far as I understand enums need to be allocated through Khronos group
before you can make use of them.
Otherwise this will be unusable by applications as soon as another extension
reuses the same enums...

Hello Lionel,
   Thank you for the comments, yes we used IMG extension as a template and were
   wondering how the xml is modified , when we found the procedure for proposing
   an extension : https://www.khronos.org/registry/OpenGL/docs/rules.html ,
   just wondering how does it work for RFC ,
   Do we need to get through the Khronos process first then send for RFC or
   is the other way round ok ? I shall dig into the mail archives for any
   existing extension and how it came into being.

Thanks,
Kedar


Hey Kedar,

I think usually there is some amount of discussion internally about 
whether the feature makes sense.

Then it's published through Khronos and finally the implementation can land.

-
Lionel


Thanks,

-
Lionel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/2] i965, anv: Add extra unused character in disk_cache renderer temp string

2018-07-24 Thread Lionel Landwerlin

On 23/07/18 05:27, Jordan Justen wrote:

This extra character should not be used by snprintf, but we make it
available to verify that we printed the exact number we wanted, and
didn't overflow.

v2:
  * Also update Anvil

Signed-off-by: Jordan Justen 
Reviewed-by: Lionel Landwerlin  (v1)


For the series v2 :

Reviewed-by: Lionel Landwerlin 


---
  src/intel/vulkan/anv_device.c  | 4 ++--
  src/mesa/drivers/dri/i965/brw_disk_cache.c | 5 +++--
  2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 04fd6a829ed..247ba641336 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -278,10 +278,10 @@ static void
  anv_physical_device_init_disk_cache(struct anv_physical_device *device)
  {
  #ifdef ENABLE_SHADER_CACHE
-   char renderer[9];
+   char renderer[10];
 MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
 device->chipset_id);
-   assert(len == sizeof(renderer) - 1);
+   assert(len == sizeof(renderer) - 2);
  
 char timestamp[41];

 _mesa_sha1_format(timestamp, device->driver_build_sha1);
diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c 
b/src/mesa/drivers/dri/i965/brw_disk_cache.c
index c478753d4ad..a678c355b9d 100644
--- a/src/mesa/drivers/dri/i965/brw_disk_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c
@@ -377,10 +377,11 @@ void
  brw_disk_cache_init(struct intel_screen *screen)
  {
  #ifdef ENABLE_SHADER_CACHE
-   char renderer[10];
+   /* array length: print length + null char + 1 extra to verify it is unused 
*/
+   char renderer[11];
 MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer), "i965_%04x",
 screen->deviceID);
-   assert(len == sizeof(renderer) - 1);
+   assert(len == sizeof(renderer) - 2);
  
 const struct build_id_note *note =

build_id_find_nhdr_for_addr(brw_disk_cache_init);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/compiler: Account for built-in uniforms in analyze_ubo_ranges

2018-07-23 Thread Lionel Landwerlin

On 23/07/18 18:46, Jason Ekstrand wrote:

The original pass only looked for load_uniform intrinsics but there are
a number of other places that could end up loading a push constant.  One
obvious omission was images which always implicitly use a push constant.
Legacy VS clip planes also get pushed into the shader.

Cc: mesa-sta...@lists.freedesktop.org
Cc: Kenneth Graunke 


Tested-by: Lionel Landwerlin 


---
  src/intel/compiler/brw_nir.h  |  1 +
  .../compiler/brw_nir_analyze_ubo_ranges.c | 41 +--
  src/intel/vulkan/anv_pipeline.c   |  2 +-
  src/mesa/drivers/dri/i965/brw_gs.c|  2 +-
  src/mesa/drivers/dri/i965/brw_tcs.c   |  2 +-
  src/mesa/drivers/dri/i965/brw_tes.c   |  2 +-
  src/mesa/drivers/dri/i965/brw_vs.c|  2 +-
  src/mesa/drivers/dri/i965/brw_wm.c|  2 +-
  8 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h
index 19442b47eae..7d82edafe46 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -148,6 +148,7 @@ void brw_nir_lower_patch_vertices_in_to_uniform(nir_shader 
*nir);
  
  void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler,

  nir_shader *nir,
+const struct brw_vs_prog_key *vs_key,
  struct brw_ubo_range out_ranges[4]);
  
  bool brw_nir_opt_peephole_ffma(nir_shader *shader);

diff --git a/src/intel/compiler/brw_nir_analyze_ubo_ranges.c 
b/src/intel/compiler/brw_nir_analyze_ubo_ranges.c
index cd5137da06e..cfa531675fc 100644
--- a/src/intel/compiler/brw_nir_analyze_ubo_ranges.c
+++ b/src/intel/compiler/brw_nir_analyze_ubo_ranges.c
@@ -124,12 +124,29 @@ analyze_ubos_block(struct ubo_analysis_state *state, 
nir_block *block)
   continue;
  
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);

-  if (intrin->intrinsic == nir_intrinsic_load_uniform)
+  switch (intrin->intrinsic) {
+  case nir_intrinsic_load_uniform:
+  case nir_intrinsic_image_deref_load:
+  case nir_intrinsic_image_deref_store:
+  case nir_intrinsic_image_deref_atomic_add:
+  case nir_intrinsic_image_deref_atomic_min:
+  case nir_intrinsic_image_deref_atomic_max:
+  case nir_intrinsic_image_deref_atomic_and:
+  case nir_intrinsic_image_deref_atomic_or:
+  case nir_intrinsic_image_deref_atomic_xor:
+  case nir_intrinsic_image_deref_atomic_exchange:
+  case nir_intrinsic_image_deref_atomic_comp_swap:
+  case nir_intrinsic_image_deref_size:
   state->uses_regular_uniforms = true;
-
-  if (intrin->intrinsic != nir_intrinsic_load_ubo)
   continue;
  
+  case nir_intrinsic_load_ubo:

+ break; /* Fall through to the analysis below */
+
+  default:
+ continue; /* Not a uniform or UBO intrinsic */
+  }
+
nir_const_value *block_const = nir_src_as_const_value(intrin->src[0]);
nir_const_value *offset_const = nir_src_as_const_value(intrin->src[1]);
  
@@ -179,6 +196,7 @@ print_ubo_entry(FILE *file,

  void
  brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler,
 nir_shader *nir,
+   const struct brw_vs_prog_key *vs_key,
 struct brw_ubo_range out_ranges[4])
  {
 const struct gen_device_info *devinfo = compiler->devinfo;
@@ -197,6 +215,23 @@ brw_nir_analyze_ubo_ranges(const struct brw_compiler 
*compiler,
   _mesa_hash_table_create(mem_ctx, NULL, _mesa_key_pointer_equal),
 };
  
+   switch (nir->info.stage) {

+   case MESA_SHADER_VERTEX:
+  if (vs_key && vs_key->nr_userclip_plane_consts > 0)
+ state.uses_regular_uniforms = true;
+  break;
+
+   case MESA_SHADER_COMPUTE:
+  /* Compute shaders use push constants to get the subgroup ID so it's
+   * best to just assume some system values are pushed.
+   */
+  state.uses_regular_uniforms = true;
+  break;
+
+   default:
+  break;
+   }
+
 /* Walk the IR, recording how many times each UBO block/offset is used. */
 nir_foreach_function(function, nir) {
if (function->impl) {
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 211cee788b8..1e6bd12b87d 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -472,7 +472,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline,
anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data, map);
  
 if (stage != MESA_SHADER_COMPUTE)

-  brw_nir_analyze_ubo_ranges(compiler, nir, prog_data->ubo_ranges);
+  brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
  
 assert(nir->num_uniforms == prog_data->nr_params * 4);
  
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri

Re: [Mesa-dev] [PATCH v2] anv: Pay attention to VK_ACCESS_MEMORY_(READ|WRITE)_BIT

2018-07-23 Thread Lionel Landwerlin

Thanks Alex!

Reviewed-by: Lionel Landwerlin 

On 23/07/18 09:51, Alex Smith wrote:

According to the spec, these should apply to all read/write access
types (so would be equivalent to specifying all other access types
individually). Currently, they were doing nothing.

v2: Handle VK_ACCESS_MEMORY_WRITE_BIT in dstAccessMask.

Signed-off-by: Alex Smith 
Cc: mesa-sta...@lists.freedesktop.org
---
  src/intel/vulkan/anv_private.h | 9 +
  1 file changed, 9 insertions(+)

diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index cec2842792..1660fcbbc8 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1731,6 +1731,9 @@ anv_pipe_flush_bits_for_access_flags(VkAccessFlags flags)
   pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
   pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
   break;
+  case VK_ACCESS_MEMORY_WRITE_BIT:
+ pipe_bits |= ANV_PIPE_FLUSH_BITS;
+ break;
default:
   break; /* Nothing to do */
}
@@ -1761,6 +1764,12 @@ anv_pipe_invalidate_bits_for_access_flags(VkAccessFlags 
flags)
case VK_ACCESS_TRANSFER_READ_BIT:
   pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
   break;
+  case VK_ACCESS_MEMORY_READ_BIT:
+ pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
+ break;
+  case VK_ACCESS_MEMORY_WRITE_BIT:
+ pipe_bits |= ANV_PIPE_FLUSH_BITS;
+ break;
default:
   break; /* Nothing to do */
}



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/blorp: Fix blits to R8G8B8_UNORM_SRGB sRGB harder

2018-07-23 Thread Lionel Landwerlin

On 22/07/18 22:45, Jason Ekstrand wrote:
On Sun, Jul 22, 2018 at 1:23 PM Lionel Landwerlin 
mailto:lionel.g.landwer...@intel.com>> 
wrote:


On 21/07/18 07:12, Jason Ekstrand wrote:
> The first fix attempt contained a nasty typo which somehow
didn't get
> caught in review.  It also didn't work as intended because the sRGB
> conversion was happening but then throwing away all but the red
channel
> because it dind't know it was RGB.  Really, it's my fault for
trying to
> fix a bug without first writing tests.  I've now written tests
and they
> pass with this change. :)
>
> Fixes: 7adb1986808c "intel/blorp: Fix blits to R8G8B8_UNORM_SRGB"

I couldn't find this hash. Are you referring to commit
11712b9ca17e4e1a819dcb7d020e19c6da77bc90 ?


Yes, that one.  Fixed locally.

Otherwise looks good.


Can I take that as a review?


Yes!


-
Lionel

> ---
>   src/intel/blorp/blorp_blit.c | 14 +++---
>   1 file changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/src/intel/blorp/blorp_blit.c
b/src/intel/blorp/blorp_blit.c
> index 3b4f42a59b6..561897894c3 100644
> --- a/src/intel/blorp/blorp_blit.c
> +++ b/src/intel/blorp/blorp_blit.c
> @@ -993,6 +993,8 @@ convert_color(struct nir_builder *b,
nir_ssa_def *color,
>         value = nir_fmul(b, nir_fsat(b, nir_channel(b, color, 0)),
>                             nir_imm_float(b, factor));
>      } else if (key->dst_format == ISL_FORMAT_L8_UNORM_SRGB) {
> +      value = nir_format_linear_to_srgb(b, nir_channel(b,
color, 0));
> +   } else if (key->dst_format == ISL_FORMAT_R8G8B8_UNORM_SRGB) {
>         value = nir_format_linear_to_srgb(b, color);
>      } else if (key->dst_format == ISL_FORMAT_R9G9B9E5_SHAREDEXP) {
>         value = nir_format_pack_r9g9b9e5(b, color);
> @@ -1000,8 +1002,14 @@ convert_color(struct nir_builder *b,
nir_ssa_def *color,
>         unreachable("Unsupported format conversion");
>      }
>
> -   nir_ssa_def *u = nir_ssa_undef(b, 1, 32);
> -   return nir_vec4(b, value, u, u, u);
> +   nir_ssa_def *out_comps[4];
> +   for (unsigned i = 0; i < 4; i++) {
> +      if (i < value->num_components)
> +         out_comps[i] = nir_channel(b, value, i);
> +      else
> +         out_comps[i] = nir_ssa_undef(b, 1, 32);
> +   }
> +   return nir_vec(b, out_comps, 4);
>   }
>
>   /**
> @@ -1959,7 +1967,7 @@ try_blorp_blit(struct blorp_batch *batch,
>
>         /* If it happens to be sRGB, we need to force a
conversion */
>         if (params->dst.view.format == ISL_FORMAT_R8G8B8_UNORM_SRGB)
> -         wm_prog_key->dst_format = ISL_FORMAT_R9G9B9E5_SHAREDEXP;
> +         wm_prog_key->dst_format = ISL_FORMAT_R8G8B8_UNORM_SRGB;
>
>  surf_fake_rgb_with_red(batch->blorp->isl_dev, >dst);
>




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965/disk_cache: Add in the INTEL_DEBUG value to the renderer string

2018-07-22 Thread Lionel Landwerlin

Nice catch!

Reviewed-by: Lionel Landwerlin 

On 22/07/18 05:45, Jordan Justen wrote:

Since various options within INTEL_DEBUG could impact code generation,
we should add this into the renderer string so changing the
INTEL_DEBUG setting will cause the shader cache to work properly.

An example that will affect the program generated by i965 is the
INTEL_DEBUG=nocompact option.

Adding in the entire 64-bits of INTEL_DEBUG is overkill. Many
INTEL_DEBUG options won't actually impact the program generated by the
driver. Nevertheless, it is more maintainable and safer to just add
the entire 64-bit value.

Cc: Timothy Arceri 
Signed-off-by: Jordan Justen 
---
  src/mesa/drivers/dri/i965/brw_disk_cache.c | 7 ---
  1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c 
b/src/mesa/drivers/dri/i965/brw_disk_cache.c
index a678c355b9d..6e50476f812 100644
--- a/src/mesa/drivers/dri/i965/brw_disk_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c
@@ -378,9 +378,10 @@ brw_disk_cache_init(struct intel_screen *screen)
  {
  #ifdef ENABLE_SHADER_CACHE
 /* array length: print length + null char + 1 extra to verify it is unused 
*/
-   char renderer[11];
-   MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer), "i965_%04x",
-   screen->deviceID);
+   char renderer[28];
+   MAYBE_UNUSED int len =
+  snprintf(renderer, sizeof(renderer), "i965_%04x_%016" PRIx64,
+   screen->deviceID, INTEL_DEBUG);
 assert(len == sizeof(renderer) - 2);
  
 const struct build_id_note *note =



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965/disk_cache: Add extra unused character in renderer temp string

2018-07-22 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 22/07/18 05:45, Jordan Justen wrote:

This extra character should not be used by snprintf, but we make it
available to verify that we printed the exact number we wanted, and
didn't overflow.

Signed-off-by: Jordan Justen 
---
  src/mesa/drivers/dri/i965/brw_disk_cache.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c 
b/src/mesa/drivers/dri/i965/brw_disk_cache.c
index c478753d4ad..a678c355b9d 100644
--- a/src/mesa/drivers/dri/i965/brw_disk_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c
@@ -377,10 +377,11 @@ void
  brw_disk_cache_init(struct intel_screen *screen)
  {
  #ifdef ENABLE_SHADER_CACHE
-   char renderer[10];
+   /* array length: print length + null char + 1 extra to verify it is unused 
*/
+   char renderer[11];
 MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer), "i965_%04x",
 screen->deviceID);
-   assert(len == sizeof(renderer) - 1);
+   assert(len == sizeof(renderer) - 2);
  
 const struct build_id_note *note =

build_id_find_nhdr_for_addr(brw_disk_cache_init);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: Properly handle GetImageSubresourceLayout on complex images

2018-07-22 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 20/07/18 22:51, Jason Ekstrand wrote:

We support mipmapped and arrayed linear images so we need to support
vkGetImageSubresourceLayout on them.  Fortunately, it's just a trivial
call into ISL.
---
  src/intel/vulkan/anv_image.c | 23 ---
  1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index b6a1ac8a4d4..e039e4b7a76 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -763,17 +763,26 @@ void anv_GetImageSubresourceLayout(
  
 assert(__builtin_popcount(subresource->aspectMask) == 1);
  
-   /* If we are on a non-zero mip level or array slice, we need to

-* calculate a real offset.
-*/
-   anv_assert(subresource->mipLevel == 0);
-   anv_assert(subresource->arrayLayer == 0);
-
 layout->offset = surface->offset;
 layout->rowPitch = surface->isl.row_pitch;
 layout->depthPitch = isl_surf_get_array_pitch(>isl);
 layout->arrayPitch = isl_surf_get_array_pitch(>isl);
-   layout->size = surface->isl.size;
+
+   if (subresource->mipLevel > 0 || subresource->arrayLayer > 0) {
+  assert(surface->isl.tiling == ISL_TILING_LINEAR);
+
+  uint32_t offset_B;
+  isl_surf_get_image_offset_B_tile_sa(>isl,
+  subresource->mipLevel,
+  subresource->arrayLayer,
+  0 /* logical_z_offset_px */,
+  _B, NULL, NULL);
+  layout->offset += offset_B;
+  layout->size = layout->rowPitch * anv_minify(image->extent.height,
+   subresource->mipLevel);
+   } else {
+  layout->size = surface->isl.size;
+   }
  }
  
  /**



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/blorp: Fix blits to R8G8B8_UNORM_SRGB sRGB harder

2018-07-22 Thread Lionel Landwerlin

On 21/07/18 07:12, Jason Ekstrand wrote:

The first fix attempt contained a nasty typo which somehow didn't get
caught in review.  It also didn't work as intended because the sRGB
conversion was happening but then throwing away all but the red channel
because it dind't know it was RGB.  Really, it's my fault for trying to
fix a bug without first writing tests.  I've now written tests and they
pass with this change. :)

Fixes: 7adb1986808c "intel/blorp: Fix blits to R8G8B8_UNORM_SRGB"


I couldn't find this hash. Are you referring to commit 
11712b9ca17e4e1a819dcb7d020e19c6da77bc90 ?


Otherwise looks good.

-
Lionel


---
  src/intel/blorp/blorp_blit.c | 14 +++---
  1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index 3b4f42a59b6..561897894c3 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -993,6 +993,8 @@ convert_color(struct nir_builder *b, nir_ssa_def *color,
value = nir_fmul(b, nir_fsat(b, nir_channel(b, color, 0)),
nir_imm_float(b, factor));
 } else if (key->dst_format == ISL_FORMAT_L8_UNORM_SRGB) {
+  value = nir_format_linear_to_srgb(b, nir_channel(b, color, 0));
+   } else if (key->dst_format == ISL_FORMAT_R8G8B8_UNORM_SRGB) {
value = nir_format_linear_to_srgb(b, color);
 } else if (key->dst_format == ISL_FORMAT_R9G9B9E5_SHAREDEXP) {
value = nir_format_pack_r9g9b9e5(b, color);
@@ -1000,8 +1002,14 @@ convert_color(struct nir_builder *b, nir_ssa_def *color,
unreachable("Unsupported format conversion");
 }
  
-   nir_ssa_def *u = nir_ssa_undef(b, 1, 32);

-   return nir_vec4(b, value, u, u, u);
+   nir_ssa_def *out_comps[4];
+   for (unsigned i = 0; i < 4; i++) {
+  if (i < value->num_components)
+ out_comps[i] = nir_channel(b, value, i);
+  else
+ out_comps[i] = nir_ssa_undef(b, 1, 32);
+   }
+   return nir_vec(b, out_comps, 4);
  }
  
  /**

@@ -1959,7 +1967,7 @@ try_blorp_blit(struct blorp_batch *batch,
  
/* If it happens to be sRGB, we need to force a conversion */

if (params->dst.view.format == ISL_FORMAT_R8G8B8_UNORM_SRGB)
- wm_prog_key->dst_format = ISL_FORMAT_R9G9B9E5_SHAREDEXP;
+ wm_prog_key->dst_format = ISL_FORMAT_R8G8B8_UNORM_SRGB;
  
surf_fake_rgb_with_red(batch->blorp->isl_dev, >dst);
  



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] spirv: add support for SPV_KHR_post_depth_coverage

2018-07-20 Thread Lionel Landwerlin

Thanks for the changes, this series is :

Reviewed-by: Lionel Landwerlin 

On 20/07/18 22:50, Yunchao He wrote:

From: Ilia Mirkin 

Allow the capability to be exposed, and convert the new execution mode
into fs state.

Signed-off-by: Ilia Mirkin 
---
  src/compiler/shader_info.h| 1 +
  src/compiler/spirv/spirv_to_nir.c | 9 +
  2 files changed, 10 insertions(+)

diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
index 3b95d5962c..e9ca222dea 100644
--- a/src/compiler/shader_info.h
+++ b/src/compiler/shader_info.h
@@ -59,6 +59,7 @@ struct spirv_supported_capabilities {
 bool stencil_export;
 bool atomic_storage;
 bool storage_8bit;
+   bool post_depth_coverage;
  };
  
  typedef struct shader_info {

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index b92197b39b..7ac51a0107 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -3542,6 +3542,10 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, 
SpvOp opcode,
   spv_check_supported(stencil_export, cap);
   break;
  
+  case SpvCapabilitySampleMaskPostDepthCoverage:

+ spv_check_supported(post_depth_coverage, cap);
+ break;
+
default:
   vtn_fail("Unhandled capability");
}
@@ -3609,6 +3613,11 @@ vtn_handle_execution_mode(struct vtn_builder *b, struct 
vtn_value *entry_point,
b->shader->info.fs.early_fragment_tests = true;
break;
  
+   case SpvExecutionModePostDepthCoverage:

+  vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
+  b->shader->info.fs.post_depth_coverage = true;
+  break;
+
 case SpvExecutionModeInvocations:
vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
b->shader->info.gs.invocations = MAX2(1, mode->literals[0]);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Add support for VK_EXT_post_depth_coverage extension

2018-07-20 Thread Lionel Landwerlin

Hi there,

Since your version doesn't add very much to Ilia's series, it would be 
nice to retain the initial authorship as well as the 2 patches (separate 
generic infrastructure from driver enabling).
Jason also had a comment about the coverage variable that could be moved 
into the next hunk.


Thanks a lot,

-
Lionel

On 20/07/18 20:46, Yunchao He wrote:

The original patch was from Ilia Mirkin. See the link at
https://patchwork.freedesktop.org/series/30443/. This one
rebased that patch, and it can pass the deqp cts right now.
---
  src/compiler/shader_info.h |  1 +
  src/compiler/spirv/spirv_to_nir.c  |  9 +
  src/intel/vulkan/anv_extensions.py |  1 +
  src/intel/vulkan/anv_pipeline.c|  1 +
  src/intel/vulkan/genX_pipeline.c   | 13 +++--
  5 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
index 3b95d5962c..e9ca222dea 100644
--- a/src/compiler/shader_info.h
+++ b/src/compiler/shader_info.h
@@ -59,6 +59,7 @@ struct spirv_supported_capabilities {
 bool stencil_export;
 bool atomic_storage;
 bool storage_8bit;
+   bool post_depth_coverage;
  };
  
  typedef struct shader_info {

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index b92197b39b..7ac51a0107 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -3542,6 +3542,10 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, 
SpvOp opcode,
   spv_check_supported(stencil_export, cap);
   break;
  
+  case SpvCapabilitySampleMaskPostDepthCoverage:

+ spv_check_supported(post_depth_coverage, cap);
+ break;
+
default:
   vtn_fail("Unhandled capability");
}
@@ -3609,6 +3613,11 @@ vtn_handle_execution_mode(struct vtn_builder *b, struct 
vtn_value *entry_point,
b->shader->info.fs.early_fragment_tests = true;
break;
  
+   case SpvExecutionModePostDepthCoverage:

+  vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
+  b->shader->info.fs.post_depth_coverage = true;
+  break;
+
 case SpvExecutionModeInvocations:
vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
b->shader->info.gs.invocations = MAX2(1, mode->literals[0]);
diff --git a/src/intel/vulkan/anv_extensions.py 
b/src/intel/vulkan/anv_extensions.py
index adc1d75898..ea837744b4 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -124,6 +124,7 @@ EXTENSIONS = [
  Extension('VK_EXT_shader_viewport_index_layer',   1, True),
  Extension('VK_EXT_shader_stencil_export', 1, 'device->info.gen 
>= 9'),
  Extension('VK_EXT_vertex_attribute_divisor',  2, True),
+Extension('VK_EXT_post_depth_coverage',   1, 'device->info.gen 
>= 9'),
  ]
  
  class VkVersion:

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 95a686f783..e91c146aad 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -155,6 +155,7 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
   .subgroup_vote = true,
   .stencil_export = device->instance->physicalDevice.info.gen >= 9,
   .storage_8bit = device->instance->physicalDevice.info.gen >= 8,
+ .post_depth_coverage = device->instance->physicalDevice.info.gen >= 9,
},
 };
  
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c

index 0821d71c9f..4db1e27a3a 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -1565,6 +1565,16 @@ emit_3dstate_ps_extra(struct anv_pipeline *pipeline,
return;
 }
  
+#if GEN_GEN >= 9

+   uint32_t coverage = ICMS_NONE;
+   if (wm_prog_data->uses_sample_mask) {
+  if (wm_prog_data->post_depth_coverage)
+ coverage = ICMS_DEPTH_COVERAGE;
+  else
+ coverage = ICMS_INNER_CONSERVATIVE;
+   }
+#endif
+
 anv_batch_emit(>batch, GENX(3DSTATE_PS_EXTRA), ps) {
ps.PixelShaderValid  = true;
ps.AttributeEnable   = wm_prog_data->num_varying_inputs > 0;
@@ -1617,8 +1627,7 @@ emit_3dstate_ps_extra(struct anv_pipeline *pipeline,
  #if GEN_GEN >= 9
ps.PixelShaderComputesStencil = wm_prog_data->computed_stencil;
ps.PixelShaderPullsBary= wm_prog_data->pulls_bary;
-  ps.InputCoverageMaskState  = wm_prog_data->uses_sample_mask ?
-   ICMS_INNER_CONSERVATIVE : ICMS_NONE;
+  ps.InputCoverageMaskState  = coverage;
  #else
ps.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
  #endif



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] intel: tools: dump: protect against multiple calls on destructor

2018-07-20 Thread Lionel Landwerlin
When running gdb, make sure to pass the LD_PRELOAD variable only to
the executed program, not the debugger. Otherwise the debugger will
run the preloaded constructor/destructor too and bad things will
happen.

Suggested-by: Rafael Antognolli 
Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/intel_dump_gpu.in | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/intel/tools/intel_dump_gpu.in 
b/src/intel/tools/intel_dump_gpu.in
index 0454cff25da..aa187ba8614 100755
--- a/src/intel/tools/intel_dump_gpu.in
+++ b/src/intel/tools/intel_dump_gpu.in
@@ -23,8 +23,10 @@ EOF
 exit 0
 }
 
+ld_preload="@install_libexecdir@/libintel_dump_gpu.so${LD_PPRELOAD:+:$LD_PRELOAD}"
 args=""
 file=""
+gdb=""
 
 function add_arg() {
 arg=$1
@@ -60,6 +62,14 @@ while true; do
 add_arg "device=${1##--device=}"
 shift
 ;;
+--gdb)
+gdb=1
+shift
+;;
+-g)
+gdb=1
+shift
+;;
 --help)
 show_help
 ;;
@@ -85,9 +95,12 @@ done
 tmp_file=`mktemp`
 echo -e $args > $tmp_file
 
-LD_PRELOAD="@install_libexecdir@/libintel_dump_gpu.so${LD_PPRELOAD:+:$LD_PRELOAD}"
 \
-  INTEL_DUMP_GPU_CONFIG=$tmp_file \
-  $@
+if [ -z $gdb ]; then
+LD_PRELOAD="$ld_preload" INTEL_DUMP_GPU_CONFIG=$tmp_file $@
+else
+gdb -iex "set exec-wrapper env LD_PRELOAD=$ld_preload 
INTEL_DUMP_GPU_CONFIG=$tmp_file" --args $@
+fi
+
 ret=$?
 rm $tmp_file
 exit $ret
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: Pay attention to VK_ACCESS_MEMORY_(READ|WRITE)_BIT

2018-07-20 Thread Lionel Landwerlin

On 20/07/18 11:44, Alex Smith wrote:

According to the spec, these should apply to all read/write access
types (so would be equivalent to specifying all other access types
individually). Currently, they were doing nothing.

Signed-off-by: Alex Smith 
Cc: mesa-sta...@lists.freedesktop.org
---
  src/intel/vulkan/anv_private.h | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index cec2842792..775bacaff2 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1731,6 +1731,9 @@ anv_pipe_flush_bits_for_access_flags(VkAccessFlags flags)
   pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
   pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
   break;
+  case VK_ACCESS_MEMORY_WRITE_BIT:
+ pipe_bits |= ANV_PIPE_FLUSH_BITS;
+ break;
default:
   break; /* Nothing to do */
}
@@ -1761,6 +1764,9 @@ anv_pipe_invalidate_bits_for_access_flags(VkAccessFlags 
flags)
case VK_ACCESS_TRANSFER_READ_BIT:
   pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
   break;
+  case VK_ACCESS_MEMORY_READ_BIT:
+ pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
+ break;


I know this function is a bit oddly named for that, but with this part 
of the spec regarding VK_ACCESS_MEMORY_WRITE_BIT :


"

 *

   When included in a destination access mask, makes all available
   writes visible to all future write accesses on entities known to the
   Vulkan device.

"

I would also add :

case VK_ACCESS_MEMORY_WRITE_BIT:
    pipe_bits |= ANV_PIPE_FLUSH_BITS;
    break;

Does that sound fair?

-
Lionel


default:
   break; /* Nothing to do */
}



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] intel: tools: dump: protect against multiple calls on destructor

2018-07-20 Thread Lionel Landwerlin

On 20/07/18 11:24, Lionel Landwerlin wrote:

Because we LD_PRELOAD in front of the command line, which might
contain gdb, the fini destructor in intel_dump_gpu.c might be called
twice (once for the program we're tracing and once for gdb). This
change prevents a crash in gdb when the aub->file has already been
closed.


This was found using coredumpctl :)



Signed-off-by: Lionel Landwerlin 
---
  src/intel/tools/aub_write.c  |  5 -
  src/intel/tools/intel_dump_gpu.c | 10 --
  2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/intel/tools/aub_write.c b/src/intel/tools/aub_write.c
index 6fb99feb005..c948a4852b0 100644
--- a/src/intel/tools/aub_write.c
+++ b/src/intel/tools/aub_write.c
@@ -286,7 +286,10 @@ void
  aub_file_finish(struct aub_file *aub)
  {
 aub_ppgtt_table_finish(>pml4, 4);
-   fclose(aub->file);
+   if (aub->file) {
+  fclose(aub->file);
+  aub->file = NULL;
+   }
  }
  
  uint32_t

diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 1abe54147cf..c4082c207f0 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -538,7 +538,13 @@ ioctl_init_helper(int fd, unsigned long request, ...)
  static void __attribute__ ((destructor))
  fini(void)
  {
-   free(output_filename);
 aub_file_finish(_file);
-   free(bos);
+   if (output_filename) {
+  free(output_filename);
+  output_filename = NULL;
+   }
+   if (bos) {
+  free(bos);
+  bos = NULL;
+   }
  }



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] intel: tools: dump: make dump tool reliable under gdb

2018-07-20 Thread Lionel Landwerlin
The problem with passing the configuration of the dump lib through a
file descriptor is that it can be read only once. But under gdb you
might want to rerun your program multiple times.

This change hands the configuration through a temporary file that is
deleted once the command line passes to intel_dump_gpu has exited.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/intel_dump_gpu.c  |  2 +-
 src/intel/tools/intel_dump_gpu.in | 11 ---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 6ce7d452afb..1abe54147cf 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -349,7 +349,7 @@ maybe_init(void)
 
initialized = true;
 
-   config = fdopen(3, "r");
+   config = fopen(getenv("INTEL_DUMP_GPU_CONFIG"), "r");
while (fscanf(config, "%m[^=]=%m[^\n]\n", , ) != EOF) {
   if (!strcmp(key, "verbose")) {
  if (!strcmp(value, "1")) {
diff --git a/src/intel/tools/intel_dump_gpu.in 
b/src/intel/tools/intel_dump_gpu.in
index 9eea37189db..0454cff25da 100755
--- a/src/intel/tools/intel_dump_gpu.in
+++ b/src/intel/tools/intel_dump_gpu.in
@@ -82,7 +82,12 @@ done
 
 [ -z $file ] && add_arg "file=intel.aub"
 
+tmp_file=`mktemp`
+echo -e $args > $tmp_file
+
 
LD_PRELOAD="@install_libexecdir@/libintel_dump_gpu.so${LD_PPRELOAD:+:$LD_PRELOAD}"
 \
-  exec -- "$@" 3<https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] intel: tools: dump: protect against multiple calls on destructor

2018-07-20 Thread Lionel Landwerlin
Because we LD_PRELOAD in front of the command line, which might
contain gdb, the fini destructor in intel_dump_gpu.c might be called
twice (once for the program we're tracing and once for gdb). This
change prevents a crash in gdb when the aub->file has already been
closed.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/aub_write.c  |  5 -
 src/intel/tools/intel_dump_gpu.c | 10 --
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/intel/tools/aub_write.c b/src/intel/tools/aub_write.c
index 6fb99feb005..c948a4852b0 100644
--- a/src/intel/tools/aub_write.c
+++ b/src/intel/tools/aub_write.c
@@ -286,7 +286,10 @@ void
 aub_file_finish(struct aub_file *aub)
 {
aub_ppgtt_table_finish(>pml4, 4);
-   fclose(aub->file);
+   if (aub->file) {
+  fclose(aub->file);
+  aub->file = NULL;
+   }
 }
 
 uint32_t
diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 1abe54147cf..c4082c207f0 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -538,7 +538,13 @@ ioctl_init_helper(int fd, unsigned long request, ...)
 static void __attribute__ ((destructor))
 fini(void)
 {
-   free(output_filename);
aub_file_finish(_file);
-   free(bos);
+   if (output_filename) {
+  free(output_filename);
+  output_filename = NULL;
+   }
+   if (bos) {
+  free(bos);
+  bos = NULL;
+   }
 }
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Context aware user space EU control through application

2018-07-20 Thread Lionel Landwerlin

On 20/07/18 09:47, Chris Wilson wrote:

Quoting aravindan.muthuku...@intel.com (2018-07-20 09:32:57)

From: "Muthukumar, Aravindan" 

  The Patch here is to give control to user/ application to really
  decide what's the max GPU load it would put. If that can be
  known in advance, rpcs can be programmed accordingly.
  This solution has changes across i915,
  drm and mesa (not limited only to kernel).

  Here, we pass gpu_load_type = {high, medium, low} from application
  while context is created. Default here is 'High' and applications
  roughly know if they are going to eat up entire GPU. The typical
  usecase of 'Low' is idle screen or minor mouse movements. Users can
  read meaning of high/medium/low for their platform  & then program
  contexts accordingly. Here gpu_load_type directly translates to
  number of shader cores/EUs a particular GPU has.

  Signed-off-by: Aravindan Muthukumar 
  Signed-off-by: Kedar J Karanje 
  Signed-off-by: Praveen Diwakar 
  Signed-off-by: Yogesh Marathe 
+/* Dynamic Eu control */
+struct drm_i915_load_type {
+   __u32 ctx_id;
+   __u32 load_type;
+};
+
+/* DYNAMIC EU CONTROL */
+int
+brw_hw_context_load_type(struct brw_bufmgr *bufmgr,
+uint32_t ctx_id,
+int load_type)
+{
+   struct drm_i915_load_type type = {
+   .ctx_id = ctx_id,
+   .load_type = load_type,
+   };
+   int err;
+
+   err = 0;
+   if(drmIoctl(bufmgr->fd, DRM_IOCTL_I915_LOAD_TYPE, ))
+   err = -errno;

This went through 4 people and none noticed that there already exists a
means to set per-context parameters. And it's even used right next to
this function.

The word hint needs to be firmly embedded around here.
-Chris
__


Yep,

Looks like you want to get involved in this discussion : 
https://patchwork.freedesktop.org/series/42285/


-
Lionel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] RFC : Context aware user space Resource control

2018-07-20 Thread Lionel Landwerlin

On 20/07/18 09:32, aravindan.muthuku...@intel.com wrote:

diff --git a/src/egl/generate/egl.xml b/src/egl/generate/egl.xml
index 9250f93..52b0c9f 100644
--- a/src/egl/generate/egl.xml
+++ b/src/egl/generate/egl.xml
@@ -460,6 +460,14 @@
  
  
  
+

+
+
+
+
+
+
+
  
  
  


Are you defining a new extension here?
The chunk above seems to imply this is from IMG, but still commented as 
reserved.


As far as I understand enums need to be allocated through Khronos group 
before you can make use of them.
Otherwise this will be unusable by applications as soon as another 
extension reuses the same enums...


Thanks,

-
Lionel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 4/4] intel: tools: dump: trace memory writes

2018-07-19 Thread Lionel Landwerlin

On 18/07/18 21:58, Rafael Antognolli wrote:

On Wed, Jul 18, 2018 at 06:21:32PM +0100, Lionel Landwerlin wrote:

Signed-off-by: Lionel Landwerlin 
---
  src/intel/tools/aub_write.c | 45 ++---
  1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/src/intel/tools/aub_write.c b/src/intel/tools/aub_write.c
index de4ce33..9c140553542 100644
--- a/src/intel/tools/aub_write.c
+++ b/src/intel/tools/aub_write.c
@@ -313,10 +313,17 @@ dword_out(struct aub_file *aub, uint32_t data)
  
  static void

  mem_trace_memory_write_header_out(struct aub_file *aub, uint64_t addr,
-  uint32_t len, uint32_t addr_space)
+  uint32_t len, uint32_t addr_space,
+  const char *desc)

Looks like you are not using desc anywhere...

Other than that, things look good.


Duh! Fixed locally.
Counts as Rb?

Thanks,

-
Lionel




  {
 uint32_t dwords = ALIGN(len, sizeof(uint32_t)) / sizeof(uint32_t);
  
+   if (aub->verbose_log_file) {

+  fprintf(aub->verbose_log_file,
+  "  MEM WRITE (0x%016" PRIx64 "-0x%016" PRIx64 ")\n",
+  addr, addr + len);
+   }
+
 dword_out(aub, CMD_MEM_TRACE_MEMORY_WRITE | (5 + dwords - 1));
 dword_out(aub, addr & 0x);   /* addr lo */
 dword_out(aub, addr >> 32);   /* addr hi */
@@ -387,7 +394,8 @@ populate_ppgtt_table(struct aub_file *aub, struct 
aub_ppgtt_table *table,
uint64_t write_size = (dirty_end - dirty_start + 1) *
   sizeof(uint64_t);
mem_trace_memory_write_header_out(aub, write_addr, write_size,
-
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL);
+
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL,
+"PPGTT update");
data_out(aub, entries + dirty_start, write_size);
 }
  }
@@ -476,7 +484,8 @@ write_execlists_header(struct aub_file *aub, const char 
*name)
  
 mem_trace_memory_write_header_out(aub, STATIC_GGTT_MAP_START >> 12,

   ggtt_ptes * GEN8_PTE_SIZE,
- 
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY);
+ 
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY,
+ "GGTT PT");
 for (uint32_t i = 0; i < ggtt_ptes; i++) {
dword_out(aub, 1 + 0x1000 * i + STATIC_GGTT_MAP_START);
dword_out(aub, 0);
@@ -484,7 +493,8 @@ write_execlists_header(struct aub_file *aub, const char 
*name)
  
 /* RENDER_RING */

 mem_trace_memory_write_header_out(aub, RENDER_RING_ADDR, RING_SIZE,
- AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT);
+ AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
+ "RENDER RING");
 for (uint32_t i = 0; i < RING_SIZE; i += sizeof(uint32_t))
dword_out(aub, 0);
  
@@ -492,7 +502,8 @@ write_execlists_header(struct aub_file *aub, const char *name)

 mem_trace_memory_write_header_out(aub, RENDER_CONTEXT_ADDR,
   PPHWSP_SIZE +
   sizeof(render_context_init),
- AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT);
+ AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
+ "RENDER PPHWSP");
 for (uint32_t i = 0; i < PPHWSP_SIZE; i += sizeof(uint32_t))
dword_out(aub, 0);
  
@@ -501,7 +512,8 @@ write_execlists_header(struct aub_file *aub, const char *name)
  
 /* BLITTER_RING */

 mem_trace_memory_write_header_out(aub, BLITTER_RING_ADDR, RING_SIZE,
- AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT);
+ AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
+ "BLITTER RING");
 for (uint32_t i = 0; i < RING_SIZE; i += sizeof(uint32_t))
dword_out(aub, 0);
  
@@ -509,7 +521,8 @@ write_execlists_header(struct aub_file *aub, const char *name)

 mem_trace_memory_write_header_out(aub, BLITTER_CONTEXT_ADDR,
   PPHWSP_SIZE +
   sizeof(blitter_context_init),
- AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT);
+ AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
+ "BLITTER PPHWSP");
 for (uint32_t i = 0; i < PPHWSP_SIZE; i += sizeof(uint32_t))
dword_out(aub, 0);
  
@@ -518,7 +531,8 @@ write_execlists_header(struct aub_file *aub, const char *name)
  
 /* VIDEO_RING */

 mem_trace_memory_

Re: [Mesa-dev] [PATCH v2 1/4] intel: tools: dump: remove command execution feature

2018-07-19 Thread Lionel Landwerlin

Was that for the whole series, or just this patch? :)

Thanks,

-
Lionel

On 18/07/18 21:42, Jason Ekstrand wrote:

Very sketchily

Reviewed-by: Jason Ekstrand <mailto:ja...@jlekstrand.net>>


On Wed, Jul 18, 2018 at 10:21 AM Lionel Landwerlin 
mailto:lionel.g.landwer...@intel.com>> 
wrote:


In commit 86cb05a6d35a52 ("intel: aubinator: remove standard input
processing option") we removed the ability to process aub as an input
stream because we're now rely on mmapping the aub file to back the
buffers aubinator is parsing.

intel_aubdump was the provider of the standard input data and since
we've copied/reworked intel_aubdump into intel_dump_gpu within Mesa,
we don't need that code anymore.

    Signed-off-by: Lionel Landwerlin mailto:lionel.g.landwer...@intel.com>>
---
 src/intel/tools/intel_dump_gpu.c  | 121
+++---
 src/intel/tools/intel_dump_gpu.in <http://intel_dump_gpu.in> | 
27 +--
 2 files changed, 29 insertions(+), 119 deletions(-)

diff --git a/src/intel/tools/intel_dump_gpu.c
b/src/intel/tools/intel_dump_gpu.c
index 6d2c4b7f983..5fd2c8ea723 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -53,8 +53,8 @@ static int (*libc_close)(int fd) =
close_init_helper;
 static int (*libc_ioctl)(int fd, unsigned long request, ...) =
ioctl_init_helper;

 static int drm_fd = -1;
-static char *filename = NULL;
-static FILE *files[2] = { NULL, NULL };
+static char *output_filename = NULL;
+static FILE *output_file = NULL;
 static int verbose = 0;
 static bool device_override;

@@ -111,7 +111,7 @@ align_u32(uint32_t v, uint32_t a)

 static struct gen_device_info devinfo = {0};
 static uint32_t device;
-static struct aub_file aubs[2];
+static struct aub_file aub_file;

 static void *
 relocate_bo(struct bo *bo, const struct drm_i915_gem_execbuffer2
*execbuffer2,
@@ -205,28 +205,21 @@ dump_execbuffer2(int fd, struct
drm_i915_gem_execbuffer2 *execbuffer2)
       fail_if(!gen_get_device_info(device, ),
               "failed to identify chipset=0x%x\n", device);

-      for (int i = 0; i < ARRAY_SIZE(files); i++) {
-         if (files[i] != NULL) {
-            aub_file_init([i], files[i], device);
-            if (verbose == 2)
-               aubs[i].verbose_log_file = stdout;
-            aub_write_header([i],
program_invocation_short_name);
-         }
-      }
+      aub_file_init(_file, output_file, device);
+      if (verbose == 2)
+         aub_file.verbose_log_file = stdout;
+      aub_write_header(_file, program_invocation_short_name);

       if (verbose)
          printf("[intel_aubdump: running, "
                 "output file %s, chipset id 0x%04x, gen %d]\n",
-                filename, device, devinfo.gen);
+                output_filename, device, devinfo.gen);
    }

-   /* Any aub */
-   struct aub_file *any_aub = files[0] ? [0] : [1];;
-
-   if (aub_use_execlists(any_aub))
+   if (aub_use_execlists(_file))
       offset = 0x1000;
    else
-      offset = aub_gtt_size(any_aub);
+      offset = aub_gtt_size(_file);

    if (verbose)
       printf("Dumping execbuffer2:\n");
@@ -263,13 +256,8 @@ dump_execbuffer2(int fd, struct
drm_i915_gem_execbuffer2 *execbuffer2)
          bo->map = gem_mmap(fd, obj->handle, 0, bo->size);
       fail_if(bo->map == MAP_FAILED, "intel_aubdump: bo mmap
failed\n");

-      for (int i = 0; i < ARRAY_SIZE(files); i++) {
-         if (files[i] == NULL)
-            continue;
-
-         if (aub_use_execlists([i]))
-            aub_map_ppgtt([i], bo->offset, bo->size);
-      }
+      if (aub_use_execlists(_file))
+         aub_map_ppgtt(_file, bo->offset, bo->size);
    }

    batch_index = (execbuffer2->flags & I915_EXEC_BATCH_FIRST) ? 0 :
@@ -284,30 +272,21 @@ dump_execbuffer2(int fd, struct
drm_i915_gem_execbuffer2 *execbuffer2)
       else
          data = bo->map;

-      for (int i = 0; i < ARRAY_SIZE(files); i++) {
-         if (files[i] == NULL)
-            continue;
-
-         if (bo == batch_bo) {
-            aub_write_trace_block([i], AUB_TRACE_TYPE_BATCH,
-                                  GET_PTR(data), bo->size,
bo->offset);
-         } else {
-            aub_write_trace_block([i], AUB_TRACE_TYPE_NOTYPE,
-                                  GET_PTR(data), bo->size,
bo->offset);
-         }
+      if (bo == batch_bo) {
+         aub_write_trace_block(_file, AUB_TRACE_TYPE_BATCH,
+                               GET_PTR(data), bo->siz

Re: [Mesa-dev] [PATCH] intel/tools: fix segfault with intel_dump_gpu

2018-07-19 Thread Lionel Landwerlin

Hey Jordan,

I have patch that remove this for loop, reviewed by Jason.
Landing it right now, that should fix this problem.

Thanks,

-
Lionel

On 19/07/18 09:52, Jordan Justen wrote:

Cc: Jason Ekstrand 
Cc: Lionel Landwerlin 
Fixes: 0a457d987ee "intel/tools: Refactor aub dumping to remove singletons"
Signed-off-by: Jordan Justen 
---
  src/intel/tools/intel_dump_gpu.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 6d2c4b7f983..e0ff1245925 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -301,7 +301,7 @@ dump_execbuffer2(int fd, struct drm_i915_gem_execbuffer2 
*execbuffer2)
 }
  
 for (int i = 0; i < ARRAY_SIZE(files); i++) {

-  if (files[i] != NULL)
+  if (files[i] == NULL)
   continue;
  
aub_write_exec([i],



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/4] intel: tools: aubwrite: fix invalid frees on finish

2018-07-18 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/aub_write.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/intel/tools/aub_write.c b/src/intel/tools/aub_write.c
index 1224e8f6b7f..de4ce33 100644
--- a/src/intel/tools/aub_write.c
+++ b/src/intel/tools/aub_write.c
@@ -255,11 +255,16 @@ align_u32(uint32_t v, uint32_t a)
 }
 
 static void
-aub_ppgtt_table_finish(struct aub_ppgtt_table *table)
+aub_ppgtt_table_finish(struct aub_ppgtt_table *table, int level)
 {
+   if (level == 1)
+  return;
+
for (unsigned i = 0; i < ARRAY_SIZE(table->subtables); i++) {
-  aub_ppgtt_table_finish(table->subtables[i]);
-  free(table->subtables[i]);
+  if (table->subtables[i]) {
+ aub_ppgtt_table_finish(table->subtables[i], level - 1);
+ free(table->subtables[i]);
+  }
}
 }
 
@@ -280,7 +285,7 @@ aub_file_init(struct aub_file *aub, FILE *file, uint16_t 
pci_id)
 void
 aub_file_finish(struct aub_file *aub)
 {
-   aub_ppgtt_table_finish(>pml4);
+   aub_ppgtt_table_finish(>pml4, 4);
fclose(aub->file);
 }
 
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/4] intel: tools: dump: remove mentions of intel_aubdump

2018-07-18 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/intel_dump_gpu.c | 30 +++---
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 5fd2c8ea723..6ce7d452afb 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -86,6 +86,7 @@ fail_if(int cond, const char *format, ...)
   return;
 
va_start(args, format);
+   fprintf(stderr, "intel_dump_gpu: ");
vfprintf(stderr, format, args);
va_end(args);
 
@@ -125,10 +126,10 @@ relocate_bo(struct bo *bo, const struct 
drm_i915_gem_execbuffer2 *execbuffer2,
int handle;
 
relocated = malloc(bo->size);
-   fail_if(relocated == NULL, "intel_aubdump: out of memory\n");
+   fail_if(relocated == NULL, "out of memory\n");
memcpy(relocated, GET_PTR(bo->map), bo->size);
for (size_t i = 0; i < obj->relocation_count; i++) {
-  fail_if(relocs[i].offset >= bo->size, "intel_aubdump: reloc outside 
bo\n");
+  fail_if(relocs[i].offset >= bo->size, "reloc outside bo\n");
 
   if (execbuffer2->flags & I915_EXEC_HANDLE_LUT)
  handle = exec_objects[relocs[i].target_handle].handle;
@@ -211,8 +212,7 @@ dump_execbuffer2(int fd, struct drm_i915_gem_execbuffer2 
*execbuffer2)
   aub_write_header(_file, program_invocation_short_name);
 
   if (verbose)
- printf("[intel_aubdump: running, "
-"output file %s, chipset id 0x%04x, gen %d]\n",
+ printf("[running, output file %s, chipset id 0x%04x, gen %d]\n",
 output_filename, device, devinfo.gen);
}
 
@@ -254,7 +254,7 @@ dump_execbuffer2(int fd, struct drm_i915_gem_execbuffer2 
*execbuffer2)
 
   if (bo->map == NULL && bo->size > 0)
  bo->map = gem_mmap(fd, obj->handle, 0, bo->size);
-  fail_if(bo->map == MAP_FAILED, "intel_aubdump: bo mmap failed\n");
+  fail_if(bo->map == MAP_FAILED, "bo mmap failed\n");
 
   if (aub_use_execlists(_file))
  aub_map_ppgtt(_file, bo->offset, bo->size);
@@ -310,8 +310,8 @@ add_new_bo(int handle, uint64_t size, void *map)
 {
struct bo *bo = [handle];
 
-   fail_if(handle >= MAX_BO_COUNT, "intel_aubdump: bo handle out of range\n");
-   fail_if(size == 0, "intel_aubdump: bo size is invalid\n");
+   fail_if(handle >= MAX_BO_COUNT, "bo handle out of range\n");
+   fail_if(size == 0, "bo size is invalid\n");
 
bo->size = size;
bo->map = map;
@@ -359,17 +359,17 @@ maybe_init(void)
  }
   } else if (!strcmp(key, "device")) {
  fail_if(sscanf(value, "%i", ) != 1,
- "intel_aubdump: failed to parse device id '%s'",
+ "failed to parse device id '%s'",
  value);
  device_override = true;
   } else if (!strcmp(key, "file")) {
  output_filename = strdup(value);
  output_file = fopen(output_filename, "w+");
  fail_if(output_file == NULL,
- "intel_aubdump: failed to open file '%s'\n",
+ "failed to open file '%s'\n",
  output_filename);
   } else {
- fprintf(stderr, "intel_aubdump: unknown option '%s'\n", key);
+ fprintf(stderr, "unknown option '%s'\n", key);
   }
 
   free(key);
@@ -378,7 +378,7 @@ maybe_init(void)
fclose(config);
 
bos = calloc(MAX_BO_COUNT, sizeof(bos[0]));
-   fail_if(bos == NULL, "intel_aubdump: out of memory\n");
+   fail_if(bos == NULL, "out of memory\n");
 }
 
 __attribute__ ((visibility ("default"))) int
@@ -398,7 +398,7 @@ ioctl(int fd, unsigned long request, ...)
(buf.st_mode & S_IFMT) == S_IFCHR && major(buf.st_rdev) == DRM_MAJOR) {
   drm_fd = fd;
   if (verbose)
- printf("[intel_aubdump: intercept drm ioctl on fd %d]\n", fd);
+ printf("[intercept drm ioctl on fd %d]\n", fd);
}
 
if (fd == drm_fd) {
@@ -428,7 +428,7 @@ ioctl(int fd, unsigned long request, ...)
   case DRM_IOCTL_I915_GEM_EXECBUFFER: {
  static bool once;
  if (!once) {
-fprintf(stderr, "intel_aubdump: "
+fprintf(stderr,
 "application uses DRM_IOCTL_I915_GEM_EXECBUFFER, not 
handled\n");
 once = true;
  }
@@ -490,7 +490,7 @@ ioctl(int fd, unsigned long request, ...)
 off_t size;
 
 size = lseek(prime->fd, 0, SEEK_END);
-fail_if(size == -1, "intel_aubdump: failed to get prime bo 
size\n");
+fail_if(size == -1, "failed to get prime bo size\n");
 add_new_bo(pri

[Mesa-dev] [PATCH v2 1/4] intel: tools: dump: remove command execution feature

2018-07-18 Thread Lionel Landwerlin
In commit 86cb05a6d35a52 ("intel: aubinator: remove standard input
processing option") we removed the ability to process aub as an input
stream because we're now rely on mmapping the aub file to back the
buffers aubinator is parsing.

intel_aubdump was the provider of the standard input data and since
we've copied/reworked intel_aubdump into intel_dump_gpu within Mesa,
we don't need that code anymore.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/intel_dump_gpu.c  | 121 +++---
 src/intel/tools/intel_dump_gpu.in |  27 +--
 2 files changed, 29 insertions(+), 119 deletions(-)

diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 6d2c4b7f983..5fd2c8ea723 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -53,8 +53,8 @@ static int (*libc_close)(int fd) = close_init_helper;
 static int (*libc_ioctl)(int fd, unsigned long request, ...) = 
ioctl_init_helper;
 
 static int drm_fd = -1;
-static char *filename = NULL;
-static FILE *files[2] = { NULL, NULL };
+static char *output_filename = NULL;
+static FILE *output_file = NULL;
 static int verbose = 0;
 static bool device_override;
 
@@ -111,7 +111,7 @@ align_u32(uint32_t v, uint32_t a)
 
 static struct gen_device_info devinfo = {0};
 static uint32_t device;
-static struct aub_file aubs[2];
+static struct aub_file aub_file;
 
 static void *
 relocate_bo(struct bo *bo, const struct drm_i915_gem_execbuffer2 *execbuffer2,
@@ -205,28 +205,21 @@ dump_execbuffer2(int fd, struct drm_i915_gem_execbuffer2 
*execbuffer2)
   fail_if(!gen_get_device_info(device, ),
   "failed to identify chipset=0x%x\n", device);
 
-  for (int i = 0; i < ARRAY_SIZE(files); i++) {
- if (files[i] != NULL) {
-aub_file_init([i], files[i], device);
-if (verbose == 2)
-   aubs[i].verbose_log_file = stdout;
-aub_write_header([i], program_invocation_short_name);
- }
-  }
+  aub_file_init(_file, output_file, device);
+  if (verbose == 2)
+ aub_file.verbose_log_file = stdout;
+  aub_write_header(_file, program_invocation_short_name);
 
   if (verbose)
  printf("[intel_aubdump: running, "
 "output file %s, chipset id 0x%04x, gen %d]\n",
-filename, device, devinfo.gen);
+output_filename, device, devinfo.gen);
}
 
-   /* Any aub */
-   struct aub_file *any_aub = files[0] ? [0] : [1];;
-
-   if (aub_use_execlists(any_aub))
+   if (aub_use_execlists(_file))
   offset = 0x1000;
else
-  offset = aub_gtt_size(any_aub);
+  offset = aub_gtt_size(_file);
 
if (verbose)
   printf("Dumping execbuffer2:\n");
@@ -263,13 +256,8 @@ dump_execbuffer2(int fd, struct drm_i915_gem_execbuffer2 
*execbuffer2)
  bo->map = gem_mmap(fd, obj->handle, 0, bo->size);
   fail_if(bo->map == MAP_FAILED, "intel_aubdump: bo mmap failed\n");
 
-  for (int i = 0; i < ARRAY_SIZE(files); i++) {
- if (files[i] == NULL)
-continue;
-
- if (aub_use_execlists([i]))
-aub_map_ppgtt([i], bo->offset, bo->size);
-  }
+  if (aub_use_execlists(_file))
+ aub_map_ppgtt(_file, bo->offset, bo->size);
}
 
batch_index = (execbuffer2->flags & I915_EXEC_BATCH_FIRST) ? 0 :
@@ -284,30 +272,21 @@ dump_execbuffer2(int fd, struct drm_i915_gem_execbuffer2 
*execbuffer2)
   else
  data = bo->map;
 
-  for (int i = 0; i < ARRAY_SIZE(files); i++) {
- if (files[i] == NULL)
-continue;
-
- if (bo == batch_bo) {
-aub_write_trace_block([i], AUB_TRACE_TYPE_BATCH,
-  GET_PTR(data), bo->size, bo->offset);
- } else {
-aub_write_trace_block([i], AUB_TRACE_TYPE_NOTYPE,
-  GET_PTR(data), bo->size, bo->offset);
- }
+  if (bo == batch_bo) {
+ aub_write_trace_block(_file, AUB_TRACE_TYPE_BATCH,
+   GET_PTR(data), bo->size, bo->offset);
+  } else {
+ aub_write_trace_block(_file, AUB_TRACE_TYPE_NOTYPE,
+   GET_PTR(data), bo->size, bo->offset);
   }
+
   if (data != bo->map)
  free(data);
}
 
-   for (int i = 0; i < ARRAY_SIZE(files); i++) {
-  if (files[i] != NULL)
- continue;
-
-  aub_write_exec([i],
- batch_bo->offset + execbuffer2->batch_start_offset,
- offset, ring_flag);
-   }
+   aub_write_exec(_file,
+  batch_bo->offset + execbuffer2->batch_start_offset,
+  offset, ring_flag);
 
if (device_override &&
(execbuffer2->flags & I915_EXEC_FENCE_ARRAY) != 0) {
@@ -358,40 +337,6 @@ close(int fd)
return libc_c

[Mesa-dev] [PATCH v2 4/4] intel: tools: dump: trace memory writes

2018-07-18 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/aub_write.c | 45 ++---
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/src/intel/tools/aub_write.c b/src/intel/tools/aub_write.c
index de4ce33..9c140553542 100644
--- a/src/intel/tools/aub_write.c
+++ b/src/intel/tools/aub_write.c
@@ -313,10 +313,17 @@ dword_out(struct aub_file *aub, uint32_t data)
 
 static void
 mem_trace_memory_write_header_out(struct aub_file *aub, uint64_t addr,
-  uint32_t len, uint32_t addr_space)
+  uint32_t len, uint32_t addr_space,
+  const char *desc)
 {
uint32_t dwords = ALIGN(len, sizeof(uint32_t)) / sizeof(uint32_t);
 
+   if (aub->verbose_log_file) {
+  fprintf(aub->verbose_log_file,
+  "  MEM WRITE (0x%016" PRIx64 "-0x%016" PRIx64 ")\n",
+  addr, addr + len);
+   }
+
dword_out(aub, CMD_MEM_TRACE_MEMORY_WRITE | (5 + dwords - 1));
dword_out(aub, addr & 0x);   /* addr lo */
dword_out(aub, addr >> 32);   /* addr hi */
@@ -387,7 +394,8 @@ populate_ppgtt_table(struct aub_file *aub, struct 
aub_ppgtt_table *table,
   uint64_t write_size = (dirty_end - dirty_start + 1) *
  sizeof(uint64_t);
   mem_trace_memory_write_header_out(aub, write_addr, write_size,
-
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL);
+
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL,
+"PPGTT update");
   data_out(aub, entries + dirty_start, write_size);
}
 }
@@ -476,7 +484,8 @@ write_execlists_header(struct aub_file *aub, const char 
*name)
 
mem_trace_memory_write_header_out(aub, STATIC_GGTT_MAP_START >> 12,
  ggtt_ptes * GEN8_PTE_SIZE,
- 
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY);
+ 
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY,
+ "GGTT PT");
for (uint32_t i = 0; i < ggtt_ptes; i++) {
   dword_out(aub, 1 + 0x1000 * i + STATIC_GGTT_MAP_START);
   dword_out(aub, 0);
@@ -484,7 +493,8 @@ write_execlists_header(struct aub_file *aub, const char 
*name)
 
/* RENDER_RING */
mem_trace_memory_write_header_out(aub, RENDER_RING_ADDR, RING_SIZE,
- AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT);
+ AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
+ "RENDER RING");
for (uint32_t i = 0; i < RING_SIZE; i += sizeof(uint32_t))
   dword_out(aub, 0);
 
@@ -492,7 +502,8 @@ write_execlists_header(struct aub_file *aub, const char 
*name)
mem_trace_memory_write_header_out(aub, RENDER_CONTEXT_ADDR,
  PPHWSP_SIZE +
  sizeof(render_context_init),
- AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT);
+ AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
+ "RENDER PPHWSP");
for (uint32_t i = 0; i < PPHWSP_SIZE; i += sizeof(uint32_t))
   dword_out(aub, 0);
 
@@ -501,7 +512,8 @@ write_execlists_header(struct aub_file *aub, const char 
*name)
 
/* BLITTER_RING */
mem_trace_memory_write_header_out(aub, BLITTER_RING_ADDR, RING_SIZE,
- AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT);
+ AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
+ "BLITTER RING");
for (uint32_t i = 0; i < RING_SIZE; i += sizeof(uint32_t))
   dword_out(aub, 0);
 
@@ -509,7 +521,8 @@ write_execlists_header(struct aub_file *aub, const char 
*name)
mem_trace_memory_write_header_out(aub, BLITTER_CONTEXT_ADDR,
  PPHWSP_SIZE +
  sizeof(blitter_context_init),
- AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT);
+ AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
+ "BLITTER PPHWSP");
for (uint32_t i = 0; i < PPHWSP_SIZE; i += sizeof(uint32_t))
   dword_out(aub, 0);
 
@@ -518,7 +531,8 @@ write_execlists_header(struct aub_file *aub, const char 
*name)
 
/* VIDEO_RING */
mem_trace_memory_write_header_out(aub, VIDEO_RING_ADDR, RING_SIZE,
- AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT);
+ AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
+ "VIDEO RING");
for (uint32_t i = 0; i < RING_SIZE; i += sizeof(uint32_t))
   dword_

Re: [Mesa-dev] [PATCH] intel: tools: dump: remove command execution feature

2018-07-18 Thread Lionel Landwerlin

No worries, I should have removed it when I included the tool.

Sending a v2.

On 18/07/18 17:13, Jason Ekstrand wrote:
I'm happy for us to do this and it would have made my life easier when 
refactoring the aub_write code. Unfortunately, since I just landed 
that, this will need some rebasing.   Sorry, I didn't realize how 
you'd intended things to be ordered before I landed my refactors. :-(


On Wed, Jul 18, 2018 at 7:20 AM Lionel Landwerlin 
mailto:lionel.g.landwer...@intel.com>> 
wrote:


In commit 86cb05a6d35a52 ("intel: aubinator: remove standard input
processing option") we removed the ability to process aub as an input
stream because we're now rely on mmapping the aub file to back the
buffer aubinator is parsing.

intel_aubdump was the provider of the standard input data and since
we've copied/reworked intel_aubdump into intel_dump_gpu within Mesa,
we don't that code anymore.

    Signed-off-by: Lionel Landwerlin mailto:lionel.g.landwer...@intel.com>>
---
 src/intel/tools/intel_dump_gpu.c  | 73
---
 src/intel/tools/intel_dump_gpu.in <http://intel_dump_gpu.in> | 27
+---
 2 files changed, 10 insertions(+), 90 deletions(-)

diff --git a/src/intel/tools/intel_dump_gpu.c
b/src/intel/tools/intel_dump_gpu.c
index 766ba662d91..6758cab13c4 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -254,7 +254,7 @@ static int (*libc_ioctl)(int fd, unsigned long
request, ...) = ioctl_init_helper

 static int drm_fd = -1;
 static char *filename = NULL;
-static FILE *files[2] = { NULL, NULL };
+static FILE *aub_file = NULL;
 static struct gen_device_info devinfo = {0};
 static int verbose = 0;
 static bool device_override;
@@ -320,13 +320,8 @@ align_u32(uint32_t v, uint32_t a)
 static void
 dword_out(uint32_t data)
 {
-   for (int i = 0; i < ARRAY_SIZE (files); i++) {
-      if (files[i] == NULL)
-         continue;
-
-      fail_if(fwrite(, 1, 4, files[i]) == 0,
-              "Writing to output failed\n");
-   }
+   fail_if(fwrite(, 1, 4, aub_file) == 0,
+           "Writing to output failed\n");
 }

 static void
@@ -335,13 +330,8 @@ data_out(const void *data, size_t size)
    if (size == 0)
       return;

-   for (int i = 0; i < ARRAY_SIZE (files); i++) {
-      if (files[i] == NULL)
-         continue;
-
-      fail_if(fwrite(data, 1, size, files[i]) == 0,
-              "Writing to output failed\n");
-   }
+   fail_if(fwrite(data, 1, size, aub_file) == 0,
+           "Writing to output failed\n");
 }

 static uint32_t
@@ -990,10 +980,7 @@ dump_execbuffer2(int fd, struct
drm_i915_gem_execbuffer2 *execbuffer2)
                           ring_flag);
    }

-   for (int i = 0; i < ARRAY_SIZE(files); i++) {
-      if (files[i] != NULL)
-         fflush(files[i]);
-   }
+   fflush(aub_file);

    if (device_override &&
        (execbuffer2->flags & I915_EXEC_FENCE_ARRAY) != 0) {
@@ -1044,40 +1031,6 @@ close(int fd)
    return libc_close(fd);
 }

-static FILE *
-launch_command(char *command)
-{
-   int i = 0, fds[2];
-   char **args = calloc(strlen(command), sizeof(char *));
-   char *iter = command;
-
-   args[i++] = iter = command;
-
-   while ((iter = strstr(iter, ",")) != NULL) {
-      *iter = '\0';
-      iter += 1;
-      args[i++] = iter;
-   }
-
-   if (pipe(fds) == -1)
-      return NULL;
-
-   switch (fork()) {
-   case 0:
-      dup2(fds[0], 0);
-      fail_if(execvp(args[0], args) == -1,
-              "intel_aubdump: failed to launch child command\n");
-      return NULL;
-
-   default:
-      free(args);
-      return fdopen(fds[1], "w");
-
-   case -1:
-      return NULL;
-   }
-}
-
 static void
 maybe_init(void)
 {
@@ -1105,15 +1058,10 @@ maybe_init(void)
          device_override = true;
       } else if (!strcmp(key, "file")) {
          filename = strdup(value);
-         files[0] = fopen(filename, "w+");
-         fail_if(files[0] == NULL,
+         aub_file = fopen(filename, "w+");
+         fail_if(aub_file == NULL,
                  "intel_aubdump: failed to open file '%s'\n",
                  filename);
-      } else if (!strcmp(key,  "command")) {
-         files[1] = launch_command(value);
-         fail_if(files[1] == NULL,
-                 "intel_aubdump: failed to launch command '%s'\n",
-                 value);
       } else {
          fprintf(

Re: [Mesa-dev] [PATCH 1/4] intel/dump_gpu: Fix corner cases in PPGTT range calculations

2018-07-18 Thread Lionel Landwerlin

Thanks,

Reviewed-by: Lionel Landwerlin 

On 18/07/18 16:24, Jason Ekstrand wrote:
On Wed, Jul 18, 2018 at 3:08 AM Lionel Landwerlin 
mailto:lionel.g.landwer...@intel.com>> 
wrote:


On 18/07/18 00:05, Jason Ekstrand wrote:
> For large buffers which span an entire l1 page table, we got the
range
> calculations wrong.  In this case, we end up with an l1_start
which is
> the first byte represented by the given l1 table and an l1_end
which is
> the first byte after the range represented by the l1 table.  Then
> l2_start_index == L2_index(l2_end) due to roll-over. Instead,
compute
> lN_end using (1Ull << shift) - 1 so that lN_end is the last byte
in the
> range represented by the Nth level page table.  When we do this, we
> don't need the conditional expression anymore.
> ---
>   src/intel/tools/intel_dump_gpu.c | 12 ++--
>   1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/src/intel/tools/intel_dump_gpu.c
b/src/intel/tools/intel_dump_gpu.c
> index 766ba662d91..5a0283960c1 100644
> --- a/src/intel/tools/intel_dump_gpu.c
> +++ b/src/intel/tools/intel_dump_gpu.c
> @@ -457,28 +457,28 @@ map_ppgtt(uint64_t start, uint64_t size)
>
>      for (uint64_t l4 = l4_start; l4 < l4_end; l4 += (1ULL << 39)) {
>         uint64_t l3_start = max(l4, start & 0xc000);
> -      uint64_t l3_end = min(l4 + (1ULL << 39),
> +      uint64_t l3_end = min(l4 + (1ULL << 39) - 1,
>                               ((start + size - 1) |
0x3fff) & 0x);
>         uint64_t l3_start_idx = L3_index(l3_start);
> -      uint64_t l3_end_idx = L3_index(l3_start) >= l3_start_idx
? L3_index(l3_end) : 0x1ff;
> +      uint64_t l3_end_idx = L3_index(l3_start);

uint64_t l3_end_idx = L3_index(l3_end);


Right.  Fixed locally.  Review?

>
>         populate_ppgtt_table(L3_table(l4), l3_start_idx,
l3_end_idx, 3);
>
>         for (uint64_t l3 = l3_start; l3 < l3_end; l3 += (1ULL <<
30)) {
>            uint64_t l2_start = max(l3, start & 0xffe0);
> -         uint64_t l2_end = min(l3 + (1ULL << 30),
> +         uint64_t l2_end = min(l3 + (1ULL << 30) - 1,
>                                  ((start + size - 1) |
0x001f) & 0x);
>            uint64_t l2_start_idx = L2_index(l2_start);
> -         uint64_t l2_end_idx = L2_index(l2_end) >= l2_start_idx
? L2_index(l2_end) : 0x1ff;
> +         uint64_t l2_end_idx = L2_index(l2_end);
>
>            populate_ppgtt_table(L2_table(l3), l2_start_idx,
l2_end_idx, 2);
>
>            for (uint64_t l2 = l2_start; l2 < l2_end; l2 += (1ULL
<< 21)) {
>               uint64_t l1_start = max(l2, start & 0xf000);
> -            uint64_t l1_end = min(l2 + (1ULL << 21),
> +            uint64_t l1_end = min(l2 + (1ULL << 21) - 1,
>                                     ((start + size - 1) |
0x0fff) & 0x);
>               uint64_t l1_start_idx = L1_index(l1_start);
> -            uint64_t l1_end_idx = L1_index(l1_end) >=
l1_start_idx ? L1_index(l1_end) : 0x1ff;
> +            uint64_t l1_end_idx = L1_index(l1_end);
>
>               populate_ppgtt_table(L1_table(l2), l1_start_idx,
l1_end_idx, 1);
>            }




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] intel/tools: Add an error state to aub translator

2018-07-18 Thread Lionel Landwerlin

Awesome tool! And not even too much code!

Reviewed-by: Lionel Landwerlin 

In the interest of consistency and because this is an installable 
binary, I would rename it.

Now for the hard part : intel_error2aub ?

-
Lionel

On 18/07/18 00:05, Jason Ekstrand wrote:

---
  src/intel/tools/error2aub.c | 332 
  src/intel/tools/meson.build |  11 ++
  2 files changed, 343 insertions(+)
  create mode 100644 src/intel/tools/error2aub.c

diff --git a/src/intel/tools/error2aub.c b/src/intel/tools/error2aub.c
new file mode 100644
index 000..ece41d93e6c
--- /dev/null
+++ b/src/intel/tools/error2aub.c
@@ -0,0 +1,332 @@
+/*
+ * Copyright © 2007-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "aub_write.h"
+#include "i915_drm.h"
+#include "intel_aub.h"
+
+static void __attribute__ ((format(__printf__, 2, 3)))
+fail_if(int cond, const char *format, ...)
+{
+   va_list args;
+
+   if (!cond)
+  return;
+
+   va_start(args, format);
+   vfprintf(stderr, format, args);
+   va_end(args);
+
+   raise(SIGTRAP);
+}
+
+#define fail(...) fail_if(true, __VA_ARGS__)
+
+static int zlib_inflate(uint32_t **ptr, int len)
+{
+   struct z_stream_s zstream;
+   void *out;
+   const uint32_t out_size = 128*4096;  /* approximate obj size */
+
+   memset(, 0, sizeof(zstream));
+
+   zstream.next_in = (unsigned char *)*ptr;
+   zstream.avail_in = 4*len;
+
+   if (inflateInit() != Z_OK)
+  return 0;
+
+   out = malloc(out_size);
+   zstream.next_out = out;
+   zstream.avail_out = out_size;
+
+   do {
+  switch (inflate(, Z_SYNC_FLUSH)) {
+  case Z_STREAM_END:
+ goto end;
+  case Z_OK:
+ break;
+  default:
+ inflateEnd();
+ return 0;
+  }
+
+  if (zstream.avail_out)
+ break;
+
+  out = realloc(out, 2*zstream.total_out);
+  if (out == NULL) {
+ inflateEnd();
+ return 0;
+  }
+
+  zstream.next_out = (unsigned char *)out + zstream.total_out;
+  zstream.avail_out = zstream.total_out;
+   } while (1);
+ end:
+   inflateEnd();
+   free(*ptr);
+   *ptr = out;
+   return zstream.total_out / 4;
+}
+
+static int ascii85_decode(const char *in, uint32_t **out, bool inflate)
+{
+   int len = 0, size = 1024;
+
+   *out = realloc(*out, sizeof(uint32_t)*size);
+   if (*out == NULL)
+  return 0;
+
+   while (*in >= '!' && *in <= 'z') {
+  uint32_t v = 0;
+
+  if (len == size) {
+ size *= 2;
+ *out = realloc(*out, sizeof(uint32_t)*size);
+ if (*out == NULL)
+return 0;
+  }
+
+  if (*in == 'z') {
+ in++;
+  } else {
+ v += in[0] - 33; v *= 85;
+ v += in[1] - 33; v *= 85;
+ v += in[2] - 33; v *= 85;
+ v += in[3] - 33; v *= 85;
+ v += in[4] - 33;
+ in += 5;
+  }
+  (*out)[len++] = v;
+   }
+
+   if (!inflate)
+  return len;
+
+   return zlib_inflate(out, len);
+}
+
+static void
+print_help(const char *progname, FILE *file)
+{
+   fprintf(file,
+   "Usage: %s [OPTION]... [FILE]\n"
+   "Convert an Intel GPU i915 error state to an aub file.\n"
+   "  -h, --help  display this help and exit\n"
+   "  -o, --output=FILE   the output aub file (default FILE.aub)\n",
+   progname);
+}
+
+int
+main(int argc, char *argv[])
+{
+   int i, c;
+   bool help = false;
+   char *out_filename = NULL, *in_filename = NULL;
+   const struct option aubinator_opts[] = {
+  { "help",   no_argument,   NULL, 'h' },
+  { "output", required_argument, NULL, 'o' },
+  { NULL, 0, NULL, 0 }
+   }

Re: [Mesa-dev] [PATCH 3/4] intel/tools: Break aub file writing into a helper

2018-07-18 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 18/07/18 00:05, Jason Ekstrand wrote:

---
  src/intel/tools/aub_write.c  | 764 +++
  src/intel/tools/aub_write.h  |  96 
  src/intel/tools/intel_dump_gpu.c | 762 +-
  src/intel/tools/meson.build  |   2 +-
  4 files changed, 862 insertions(+), 762 deletions(-)
  create mode 100644 src/intel/tools/aub_write.c
  create mode 100644 src/intel/tools/aub_write.h

diff --git a/src/intel/tools/aub_write.c b/src/intel/tools/aub_write.c
new file mode 100644
index 000..2ec045830a8
--- /dev/null
+++ b/src/intel/tools/aub_write.c
@@ -0,0 +1,764 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "aub_write.h"
+
+#include 
+#include 
+#include 
+#include 
+
+#include "i915_drm.h"
+#include "intel_aub.h"
+
+#ifndef ALIGN
+#define ALIGN(x, y) (((x) + (y)-1) & ~((y)-1))
+#endif
+
+#define MI_LOAD_REGISTER_IMM_n(n) ((0x22 << 23) | (2 * (n) - 1))
+#define MI_LRI_FORCE_POSTED   (1<<12)
+
+#define MI_BATCH_NON_SECURE_I965 (1 << 8)
+
+#define MI_BATCH_BUFFER_END (0xA << 23)
+
+#define min(a, b) ({\
+ __typeof(a) _a = (a);  \
+ __typeof(b) _b = (b);  \
+ _a < _b ? _a : _b; \
+  })
+
+#define max(a, b) ({\
+ __typeof(a) _a = (a);  \
+ __typeof(b) _b = (b);  \
+ _a > _b ? _a : _b; \
+  })
+
+#define HWS_PGA_RCSUNIT  0x02080
+#define HWS_PGA_VCSUNIT0   0x12080
+#define HWS_PGA_BCSUNIT  0x22080
+
+#define GFX_MODE_RCSUNIT   0x0229c
+#define GFX_MODE_VCSUNIT0   0x1229c
+#define GFX_MODE_BCSUNIT   0x2229c
+
+#define EXECLIST_SUBMITPORT_RCSUNIT   0x02230
+#define EXECLIST_SUBMITPORT_VCSUNIT0   0x12230
+#define EXECLIST_SUBMITPORT_BCSUNIT   0x22230
+
+#define EXECLIST_STATUS_RCSUNIT  0x02234
+#define EXECLIST_STATUS_VCSUNIT0   0x12234
+#define EXECLIST_STATUS_BCSUNIT  0x22234
+
+#define EXECLIST_SQ_CONTENTS0_RCSUNIT   0x02510
+#define EXECLIST_SQ_CONTENTS0_VCSUNIT0   0x12510
+#define EXECLIST_SQ_CONTENTS0_BCSUNIT   0x22510
+
+#define EXECLIST_CONTROL_RCSUNIT   0x02550
+#define EXECLIST_CONTROL_VCSUNIT0   0x12550
+#define EXECLIST_CONTROL_BCSUNIT   0x22550
+
+#define MEMORY_MAP_SIZE (64 /* MiB */ * 1024 * 1024)
+
+#define PTE_SIZE 4
+#define GEN8_PTE_SIZE 8
+
+#define NUM_PT_ENTRIES (ALIGN(MEMORY_MAP_SIZE, 4096) / 4096)
+#define PT_SIZE ALIGN(NUM_PT_ENTRIES * GEN8_PTE_SIZE, 4096)
+
+#define RING_SIZE (1 * 4096)
+#define PPHWSP_SIZE (1 * 4096)
+#define GEN11_LR_CONTEXT_RENDER_SIZE(14 * 4096)
+#define GEN10_LR_CONTEXT_RENDER_SIZE(19 * 4096)
+#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * 4096)
+#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * 4096)
+#define GEN8_LR_CONTEXT_OTHER_SIZE  (2 * 4096)
+
+
+#define STATIC_GGTT_MAP_START 0
+
+#define RENDER_RING_ADDR STATIC_GGTT_MAP_START
+#define RENDER_CONTEXT_ADDR (RENDER_RING_ADDR + RING_SIZE)
+
+#define BLITTER_RING_ADDR (RENDER_CONTEXT_ADDR + PPHWSP_SIZE + 
GEN10_LR_CONTEXT_RENDER_SIZE)
+#define BLITTER_CONTEXT_ADDR (BLITTER_RING_ADDR + RING_SIZE)
+
+#define VIDEO_RING_ADDR (BLITTER_CONTEXT_ADDR + PPHWSP_SIZE + 
GEN8_LR_CONTEXT_OTHER_SIZE)
+#define VIDEO_CONTEXT_ADDR (VIDEO_RING_ADDR + RING_SIZE)
+
+#define STATIC_GGTT_MAP_END (VIDEO_CONTEXT_ADDR + PPHWSP_SIZE + 
GEN8_LR_CONTEXT_OTHER_SIZE)
+#define STATIC_GGTT_MAP_SIZE (STATIC_GGTT_MAP_END - STATIC_GGTT_MAP_START)
+
+#define PML4_PHYS_ADDR ((uint64_t)(STATIC_GGTT_MAP_END))
+
+#define CONTEXT_FLAGS (0x339)   /* Normal Priority | L3-LLC Coherency |
+ * PPGTT Enabled |
+

Re: [Mesa-dev] [PATCH 2/4] intel/tools: Refactor aub dumping to remove singletons

2018-07-18 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 18/07/18 00:05, Jason Ekstrand wrote:

Instead of having quite so many singletons, we use a struct aub_file to
organize the bits we need for writing an aub file.
---
  src/intel/tools/intel_dump_gpu.c | 498 ++-
  1 file changed, 287 insertions(+), 211 deletions(-)

diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 5a0283960c1..aad34cc24f4 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -255,11 +255,8 @@ static int (*libc_ioctl)(int fd, unsigned long request, 
...) = ioctl_init_helper
  static int drm_fd = -1;
  static char *filename = NULL;
  static FILE *files[2] = { NULL, NULL };
-static struct gen_device_info devinfo = {0};
  static int verbose = 0;
  static bool device_override;
-static uint32_t device;
-static int addr_bits = 0;
  
  #define MAX_BO_COUNT 64 * 1024
  
@@ -280,11 +277,6 @@ static struct bo *bos;

  #define IS_USERPTR(p) ((uintptr_t) (p) & USERPTR_FLAG)
  #define GET_PTR(p) ( (void *) ((uintptr_t) p & ~(uintptr_t) 1) )
  
-static inline bool use_execlists(void)

-{
-   return devinfo.gen >= 8;
-}
-
  static void __attribute__ ((format(__printf__, 2, 3)))
  fail_if(int cond, const char *format, ...)
  {
@@ -317,82 +309,121 @@ align_u32(uint32_t v, uint32_t a)
 return (v + a - 1) & ~(a - 1);
  }
  
+struct aub_ppgtt_table {

+   uint64_t phys_addr;
+   struct aub_ppgtt_table *subtables[512];
+};
+
  static void
-dword_out(uint32_t data)
+aub_ppgtt_table_finish(struct aub_ppgtt_table *table)
  {
-   for (int i = 0; i < ARRAY_SIZE (files); i++) {
-  if (files[i] == NULL)
- continue;
-
-  fail_if(fwrite(, 1, 4, files[i]) == 0,
-  "Writing to output failed\n");
+   for (unsigned i = 0; i < ARRAY_SIZE(table->subtables); i++) {
+  aub_ppgtt_table_finish(table->subtables[i]);
+  free(table->subtables[i]);
 }
  }
  
+struct aub_file {

+   FILE *file;
+
+   /* Set if you want extra logging */
+   FILE *verbose_log_file;
+
+   uint16_t pci_id;
+   struct gen_device_info devinfo;
+
+   int addr_bits;
+
+   struct aub_ppgtt_table pml4;
+};
+
  static void
-data_out(const void *data, size_t size)
+aub_file_init(struct aub_file *aub, FILE *file, uint16_t pci_id)
+{
+   memset(aub, 0, sizeof(*aub));
+
+   aub->file = file;
+   aub->pci_id = pci_id;
+   fail_if(!gen_get_device_info(pci_id, >devinfo),
+   "failed to identify chipset=0x%x\n", pci_id);
+   aub->addr_bits = aub->devinfo.gen >= 8 ? 48 : 32;
+
+   aub->pml4.phys_addr = PML4_PHYS_ADDR;
+}
+
+static void
+aub_file_finish(struct aub_file *aub)
+{
+   aub_ppgtt_table_finish(>pml4);
+   fclose(aub->file);
+}
+
+static inline bool aub_use_execlists(const struct aub_file *aub)
+{
+   return aub->devinfo.gen >= 8;
+}
+
+static void
+data_out(struct aub_file *aub, const void *data, size_t size)
  {
 if (size == 0)
return;
  
-   for (int i = 0; i < ARRAY_SIZE (files); i++) {

-  if (files[i] == NULL)
- continue;
+   fail_if(fwrite(data, 1, size, aub->file) == 0,
+   "Writing to output failed\n");
+}
  
-  fail_if(fwrite(data, 1, size, files[i]) == 0,

-  "Writing to output failed\n");
-   }
+static void
+dword_out(struct aub_file *aub, uint32_t data)
+{
+   data_out(aub, , sizeof(data));
  }
  
  static uint32_t

-gtt_size(void)
+aub_gtt_size(struct aub_file *aub)
  {
-   return NUM_PT_ENTRIES * (addr_bits > 32 ? GEN8_PTE_SIZE : PTE_SIZE);
+   return NUM_PT_ENTRIES * (aub->addr_bits > 32 ? GEN8_PTE_SIZE : PTE_SIZE);
  }
  
  static void

-mem_trace_memory_write_header_out(uint64_t addr, uint32_t len,
-  uint32_t addr_space)
+mem_trace_memory_write_header_out(struct aub_file *aub, uint64_t addr,
+  uint32_t len, uint32_t addr_space)
  {
 uint32_t dwords = ALIGN(len, sizeof(uint32_t)) / sizeof(uint32_t);
  
-   dword_out(CMD_MEM_TRACE_MEMORY_WRITE | (5 + dwords - 1));

-   dword_out(addr & 0x);   /* addr lo */
-   dword_out(addr >> 32);   /* addr hi */
-   dword_out(addr_space);   /* gtt */
-   dword_out(len);
+   dword_out(aub, CMD_MEM_TRACE_MEMORY_WRITE | (5 + dwords - 1));
+   dword_out(aub, addr & 0x);   /* addr lo */
+   dword_out(aub, addr >> 32);   /* addr hi */
+   dword_out(aub, addr_space);   /* gtt */
+   dword_out(aub, len);
  }
  
  static void

-register_write_out(uint32_t addr, uint32_t value)
+register_write_out(struct aub_file *aub, uint32_t addr, uint32_t value)
  {
 uint32_t dwords = 1;
  
-   dword_out(CMD_MEM_TRACE_REGISTER_WRITE | (5 + dwords - 1));

-   dword_out(addr);
-   dword_out(AUB_MEM_TRACE_REGISTER_SIZE_DWORD |
- AUB_MEM_TRACE_REGISTER_SPACE_MMIO);
-   dword_out(0x);   /* mask lo */
-   dword_out(0x);   /* mask hi

[Mesa-dev] [PATCH] intel: tools: dump: remove command execution feature

2018-07-18 Thread Lionel Landwerlin
In commit 86cb05a6d35a52 ("intel: aubinator: remove standard input
processing option") we removed the ability to process aub as an input
stream because we're now rely on mmapping the aub file to back the
buffer aubinator is parsing.

intel_aubdump was the provider of the standard input data and since
we've copied/reworked intel_aubdump into intel_dump_gpu within Mesa,
we don't that code anymore.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/intel_dump_gpu.c  | 73 ---
 src/intel/tools/intel_dump_gpu.in | 27 +---
 2 files changed, 10 insertions(+), 90 deletions(-)

diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 766ba662d91..6758cab13c4 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -254,7 +254,7 @@ static int (*libc_ioctl)(int fd, unsigned long request, 
...) = ioctl_init_helper
 
 static int drm_fd = -1;
 static char *filename = NULL;
-static FILE *files[2] = { NULL, NULL };
+static FILE *aub_file = NULL;
 static struct gen_device_info devinfo = {0};
 static int verbose = 0;
 static bool device_override;
@@ -320,13 +320,8 @@ align_u32(uint32_t v, uint32_t a)
 static void
 dword_out(uint32_t data)
 {
-   for (int i = 0; i < ARRAY_SIZE (files); i++) {
-  if (files[i] == NULL)
- continue;
-
-  fail_if(fwrite(, 1, 4, files[i]) == 0,
-  "Writing to output failed\n");
-   }
+   fail_if(fwrite(, 1, 4, aub_file) == 0,
+   "Writing to output failed\n");
 }
 
 static void
@@ -335,13 +330,8 @@ data_out(const void *data, size_t size)
if (size == 0)
   return;
 
-   for (int i = 0; i < ARRAY_SIZE (files); i++) {
-  if (files[i] == NULL)
- continue;
-
-  fail_if(fwrite(data, 1, size, files[i]) == 0,
-  "Writing to output failed\n");
-   }
+   fail_if(fwrite(data, 1, size, aub_file) == 0,
+   "Writing to output failed\n");
 }
 
 static uint32_t
@@ -990,10 +980,7 @@ dump_execbuffer2(int fd, struct drm_i915_gem_execbuffer2 
*execbuffer2)
   ring_flag);
}
 
-   for (int i = 0; i < ARRAY_SIZE(files); i++) {
-  if (files[i] != NULL)
- fflush(files[i]);
-   }
+   fflush(aub_file);
 
if (device_override &&
(execbuffer2->flags & I915_EXEC_FENCE_ARRAY) != 0) {
@@ -1044,40 +1031,6 @@ close(int fd)
return libc_close(fd);
 }
 
-static FILE *
-launch_command(char *command)
-{
-   int i = 0, fds[2];
-   char **args = calloc(strlen(command), sizeof(char *));
-   char *iter = command;
-
-   args[i++] = iter = command;
-
-   while ((iter = strstr(iter, ",")) != NULL) {
-  *iter = '\0';
-  iter += 1;
-  args[i++] = iter;
-   }
-
-   if (pipe(fds) == -1)
-  return NULL;
-
-   switch (fork()) {
-   case 0:
-  dup2(fds[0], 0);
-  fail_if(execvp(args[0], args) == -1,
-  "intel_aubdump: failed to launch child command\n");
-  return NULL;
-
-   default:
-  free(args);
-  return fdopen(fds[1], "w");
-
-   case -1:
-  return NULL;
-   }
-}
-
 static void
 maybe_init(void)
 {
@@ -1105,15 +1058,10 @@ maybe_init(void)
  device_override = true;
   } else if (!strcmp(key, "file")) {
  filename = strdup(value);
- files[0] = fopen(filename, "w+");
- fail_if(files[0] == NULL,
+ aub_file = fopen(filename, "w+");
+ fail_if(aub_file == NULL,
  "intel_aubdump: failed to open file '%s'\n",
  filename);
-  } else if (!strcmp(key,  "command")) {
- files[1] = launch_command(value);
- fail_if(files[1] == NULL,
- "intel_aubdump: failed to launch command '%s'\n",
- value);
   } else {
  fprintf(stderr, "intel_aubdump: unknown option '%s'\n", key);
   }
@@ -1285,9 +1233,6 @@ static void __attribute__ ((destructor))
 fini(void)
 {
free(filename);
-   for (int i = 0; i < ARRAY_SIZE(files); i++) {
-  if (files[i] != NULL)
- fclose(files[i]);
-   }
+   fclose(aub_file);
free(bos);
 }
diff --git a/src/intel/tools/intel_dump_gpu.in 
b/src/intel/tools/intel_dump_gpu.in
index b9887f0ed2e..9eea37189db 100755
--- a/src/intel/tools/intel_dump_gpu.in
+++ b/src/intel/tools/intel_dump_gpu.in
@@ -10,9 +10,6 @@ contents and execution of the GEM application.
 
   -o, --output=FILE  Name of AUB file. Defaults to COMMAND.aub
 
-  -c, --command=CMD  Execute CMD and write the AUB file's content to its
- standard input
-
   --device=IDOverride PCI ID of the reported device
 
   -v Enable verbose output
@@ -27,7 +24,6 @@ EOF
 }
 
 args=""
-command=""
 file=""
 
 function add_arg() {
@@ -35,17 +31,6 @@ function add_arg() {
 args="$args$arg\n"
 }
 

Re: [Mesa-dev] [PATCH 1/4] intel/dump_gpu: Fix corner cases in PPGTT range calculations

2018-07-18 Thread Lionel Landwerlin

On 18/07/18 00:05, Jason Ekstrand wrote:

For large buffers which span an entire l1 page table, we got the range
calculations wrong.  In this case, we end up with an l1_start which is
the first byte represented by the given l1 table and an l1_end which is
the first byte after the range represented by the l1 table.  Then
l2_start_index == L2_index(l2_end) due to roll-over.  Instead, compute
lN_end using (1Ull << shift) - 1 so that lN_end is the last byte in the
range represented by the Nth level page table.  When we do this, we
don't need the conditional expression anymore.
---
  src/intel/tools/intel_dump_gpu.c | 12 ++--
  1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 766ba662d91..5a0283960c1 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -457,28 +457,28 @@ map_ppgtt(uint64_t start, uint64_t size)
  
 for (uint64_t l4 = l4_start; l4 < l4_end; l4 += (1ULL << 39)) {

uint64_t l3_start = max(l4, start & 0xc000);
-  uint64_t l3_end = min(l4 + (1ULL << 39),
+  uint64_t l3_end = min(l4 + (1ULL << 39) - 1,
  ((start + size - 1) | 0x3fff) & 
0x);
uint64_t l3_start_idx = L3_index(l3_start);
-  uint64_t l3_end_idx = L3_index(l3_start) >= l3_start_idx ? 
L3_index(l3_end) : 0x1ff;
+  uint64_t l3_end_idx = L3_index(l3_start);


uint64_t l3_end_idx = L3_index(l3_end);

  
populate_ppgtt_table(L3_table(l4), l3_start_idx, l3_end_idx, 3);
  
for (uint64_t l3 = l3_start; l3 < l3_end; l3 += (1ULL << 30)) {

   uint64_t l2_start = max(l3, start & 0xffe0);
- uint64_t l2_end = min(l3 + (1ULL << 30),
+ uint64_t l2_end = min(l3 + (1ULL << 30) - 1,
 ((start + size - 1) | 0x001f) & 
0x);
   uint64_t l2_start_idx = L2_index(l2_start);
- uint64_t l2_end_idx = L2_index(l2_end) >= l2_start_idx ? 
L2_index(l2_end) : 0x1ff;
+ uint64_t l2_end_idx = L2_index(l2_end);
  
   populate_ppgtt_table(L2_table(l3), l2_start_idx, l2_end_idx, 2);
  
   for (uint64_t l2 = l2_start; l2 < l2_end; l2 += (1ULL << 21)) {

  uint64_t l1_start = max(l2, start & 0xf000);
-uint64_t l1_end = min(l2 + (1ULL << 21),
+uint64_t l1_end = min(l2 + (1ULL << 21) - 1,
((start + size - 1) | 0x0fff) & 
0x);
  uint64_t l1_start_idx = L1_index(l1_start);
-uint64_t l1_end_idx = L1_index(l1_end) >= l1_start_idx ? 
L1_index(l1_end) : 0x1ff;
+uint64_t l1_end_idx = L1_index(l1_end);
  
  populate_ppgtt_table(L1_table(l2), l1_start_idx, l1_end_idx, 1);

   }



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] intel: Make the decoder handle STATE_BASE_ADDRESS not being a buffer.

2018-07-17 Thread Lionel Landwerlin

On 11/07/18 19:25, Kenneth Graunke wrote:

Normally, i965 programs STATE_BASE_ADDRESS every batch, and puts all
state for a given base in a single buffer.

I'm working on a prototype which emits STATE_BASE_ADDRESS only once at
startup, where each base address is a fixed 4GB region of the PPGTT.
State may live in many buffers in that 4GB region, even if there isn't
a buffer located at the actual base address itself.

To handle this, we need to save the STATE_BASE_ADDRESS values across
multiple batches, rather than assuming we'll see the command each time.
Then, each time we see a pointer, we need to ask the driver for the BO
map for that data.  (We can't just use the map for the base address, as
state may be in multiple buffers, and there may not even be a buffer
at the base address to map.)
---
  src/intel/common/gen_batch_decoder.c | 83 
  src/intel/common/gen_decoder.h   |  9 ++-
  2 files changed, 56 insertions(+), 36 deletions(-)

diff --git a/src/intel/common/gen_batch_decoder.c 
b/src/intel/common/gen_batch_decoder.c
index fe7536da9ec..6cb66bcb257 100644
--- a/src/intel/common/gen_batch_decoder.c
+++ b/src/intel/common/gen_batch_decoder.c
@@ -128,13 +128,13 @@ static void
  ctx_disassemble_program(struct gen_batch_decode_ctx *ctx,
  uint32_t ksp, const char *type)
  {
-   if (!ctx->instruction_base.map)
+   uint64_t addr = ctx->instruction_base.addr + ksp;
+   struct gen_batch_decode_bo bo = ctx_get_bo(ctx, addr);
+   if (!bo.map)
return;
  
-   printf("\nReferenced %s:\n", type);

-   gen_disasm_disassemble(ctx->disasm,
-  (void *)ctx->instruction_base.map, ksp,
-  ctx->fp);
+   fprintf(ctx->fp, "\nReferenced %s:\n", type);
+   gen_disasm_disassemble(ctx->disasm, bo.map, 0, ctx->fp);
  }
  
  /* Heuristic to determine whether a uint32_t is probably actually a float

@@ -225,35 +225,30 @@ dump_binding_table(struct gen_batch_decode_ctx *ctx, 
uint32_t offset, int count)
 if (count < 0)
count = update_count(ctx, offset, 1, 8);
  
-   if (ctx->surface_base.map == NULL) {

+   struct gen_batch_decode_bo bind_bo =
+  ctx_get_bo(ctx, ctx->surface_base.addr + offset);
+
+   if (bind_bo.map == NULL) {
fprintf(ctx->fp, "  binding table unavailable\n");
return;
 }
  
-   if (offset % 32 != 0 || offset >= UINT16_MAX ||

-   offset >= ctx->surface_base.size) {
+   if (offset % 32 != 0 || offset >= UINT16_MAX || offset >= bind_bo.size) {


I wonder if this

offset >= bind_bo.size

is right. That's assuming bind_bo.addr == ctx->surface_base.addr, but in 
your prototype it probably won't be, right?



I would check (ctx->surface_base.addr + offset) >= (bind_bo.addr + 
bind_bo.size)



fprintf(ctx->fp, "  invalid binding table pointer\n");
return;
 }
  
-   struct gen_batch_decode_bo bo = ctx->surface_base;

-   const uint32_t *pointers = ctx->surface_base.map + offset;
+   const uint32_t *pointers = bind_bo.map;
 for (int i = 0; i < count; i++) {
if (pointers[i] == 0)
   continue;
  
-  if (pointers[i] % 32 != 0) {

- fprintf(ctx->fp, "pointer %u: %08x \n", i, pointers[i]);
- continue;
-  }
-
uint64_t addr = ctx->surface_base.addr + pointers[i];
+  struct gen_batch_decode_bo bo = ctx_get_bo(ctx, addr);
uint32_t size = strct->dw_length * 4;
  
-  if (addr < bo.addr || addr + size >= bo.addr + bo.size)

- bo = ctx->get_bo(ctx->user_data, addr);
-
-  if (addr < bo.addr || addr + size >= bo.addr + bo.size) {
+  if (pointers[i] % 32 != 0 ||
+  addr < bo.addr || addr + size >= bo.addr + bo.size) {
   fprintf(ctx->fp, "pointer %u: %08x \n", i, pointers[i]);
   continue;
}
@@ -271,18 +266,20 @@ dump_samplers(struct gen_batch_decode_ctx *ctx, uint32_t 
offset, int count)
 if (count < 0)
count = update_count(ctx, offset, strct->dw_length, 4);
  
-   if (ctx->dynamic_base.map == NULL) {

+   uint64_t state_addr = ctx->dynamic_base.addr + offset;
+   struct gen_batch_decode_bo bo = ctx_get_bo(ctx, state_addr);
+   const void *state_map = bo.map;
+
+   if (state_map == NULL) {
fprintf(ctx->fp, "  samplers unavailable\n");
return;
 }
  
-   if (offset % 32 != 0 || offset >= ctx->dynamic_base.size) {

+   if (offset % 32 != 0 || state_addr - bo.addr >= bo.size) {
fprintf(ctx->fp, "  invalid sampler state pointer\n");
return;
 }
  
-   uint64_t state_addr = ctx->dynamic_base.addr + offset;

-   const void *state_map = ctx->dynamic_base.map + offset;
 for (int i = 0; i < count; i++) {
fprintf(ctx->fp, "sampler state %d\n", i);
ctx_print_group(ctx, strct, state_addr, state_map);
@@ -295,9 +292,6 @@ static void
  handle_media_interface_descriptor_load(struct gen_batch_decode_ctx *ctx,
 const uint32_t *p)
  {
-   if (ctx->dynamic_base.map 

Re: [Mesa-dev] [PATCH 1/2] intel: Make the disassembler take a const pointer to the assembly.

2018-07-17 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 11/07/18 19:25, Kenneth Graunke wrote:

Disassembling doesn't modify the assembly.
---
  src/intel/common/gen_disasm.c | 7 ---
  src/intel/common/gen_disasm.h | 2 +-
  2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/intel/common/gen_disasm.c b/src/intel/common/gen_disasm.c
index 1de20f576d4..4f835c19883 100644
--- a/src/intel/common/gen_disasm.c
+++ b/src/intel/common/gen_disasm.c
@@ -44,14 +44,15 @@ is_send(uint32_t opcode)
  }
  
  static int

-gen_disasm_find_end(struct gen_disasm *disasm, void *assembly, int start)
+gen_disasm_find_end(struct gen_disasm *disasm,
+const void *assembly, int start)
  {
 struct gen_device_info *devinfo = >devinfo;
 int offset = start;
  
 /* This loop exits when send-with-EOT or when opcode is 0 */

 while (true) {
-  brw_inst *insn = assembly + offset;
+  const brw_inst *insn = assembly + offset;
  
if (brw_inst_cmpt_control(devinfo, insn)) {

   offset += 8;
@@ -70,7 +71,7 @@ gen_disasm_find_end(struct gen_disasm *disasm, void 
*assembly, int start)
  }
  
  void

-gen_disasm_disassemble(struct gen_disasm *disasm, void *assembly,
+gen_disasm_disassemble(struct gen_disasm *disasm, const void *assembly,
 int start, FILE *out)
  {
 struct gen_device_info *devinfo = >devinfo;
diff --git a/src/intel/common/gen_disasm.h b/src/intel/common/gen_disasm.h
index c8c18b2cf03..d979114588d 100644
--- a/src/intel/common/gen_disasm.h
+++ b/src/intel/common/gen_disasm.h
@@ -34,7 +34,7 @@ struct gen_disasm;
  
  struct gen_disasm *gen_disasm_create(const struct gen_device_info *devinfo);

  void gen_disasm_disassemble(struct gen_disasm *disasm,
-void *assembly, int start, FILE *out);
+const void *assembly, int start, FILE *out);
  
  void gen_disasm_destroy(struct gen_disasm *disasm);
  



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: batchbuffer: write correct canonical offset with softpin

2018-07-17 Thread Lionel Landwerlin

On 17/07/18 16:01, Lionel Landwerlin wrote:

Addresses in the command streams should be in canonical form (i.e
bit[63:48] == bit[47]). If the [bo->gtt_offset, bo->gtt_offset +
target_offset] range contains the address 0x8000, the current
code will fail that criteria.

Fixes: 1c9053d0765dc6 ("i965: Prepare batchbuffer module for softpin support.")
Signed-off-by: Lionel Landwerlin 


This was Rb by Ken on IRC.
Waiting for some CI to comeback before pushing.

-
Lionel


---
  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index df999ffeb1d..8079ac85ea1 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -922,7 +922,7 @@ emit_reloc(struct intel_batchbuffer *batch,
  
 if (target->kflags & EXEC_OBJECT_PINNED) {

brw_use_pinned_bo(batch, target, reloc_flags & RELOC_WRITE);
-  return target->gtt_offset + target_offset;
+  return gen_canonical_address(target->gtt_offset + target_offset);
 }
  
 unsigned int index = add_exec_bo(batch, target);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: batchbuffer: write correct canonical offset with softpin

2018-07-17 Thread Lionel Landwerlin
Addresses in the command streams should be in canonical form (i.e
bit[63:48] == bit[47]). If the [bo->gtt_offset, bo->gtt_offset +
target_offset] range contains the address 0x8000, the current
code will fail that criteria.

Fixes: 1c9053d0765dc6 ("i965: Prepare batchbuffer module for softpin support.")
Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index df999ffeb1d..8079ac85ea1 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -922,7 +922,7 @@ emit_reloc(struct intel_batchbuffer *batch,
 
if (target->kflags & EXEC_OBJECT_PINNED) {
   brw_use_pinned_bo(batch, target, reloc_flags & RELOC_WRITE);
-  return target->gtt_offset + target_offset;
+  return gen_canonical_address(target->gtt_offset + target_offset);
}
 
unsigned int index = add_exec_bo(batch, target);
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: fix assert in anv_CmdBindDescriptorSets()

2018-07-17 Thread Lionel Landwerlin

On 17/07/18 10:48, Samuel Iglesias Gonsálvez wrote:

The assert is checking that we are not binding more descriptor sets
than the supported by the driver. When binding the descriptor set
number MAX_SETS-1, it was breaking the assert because
descriptorSetCount = 1.

Signed-off-by: Samuel Iglesias Gonsálvez 


Oops :(
This goes back so far, we can't really pin a particular stable version.

Cc: 
Reviewed-by: Lionel Landwerlin 


---
  src/intel/vulkan/anv_cmd_buffer.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_cmd_buffer.c 
b/src/intel/vulkan/anv_cmd_buffer.c
index 201b73ad45a..8ef71b0ed9c 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -613,7 +613,7 @@ void anv_CmdBindDescriptorSets(
 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout);
  
-   assert(firstSet + descriptorSetCount < MAX_SETS);

+   assert(firstSet + descriptorSetCount <= MAX_SETS);
  
 for (uint32_t i = 0; i < descriptorSetCount; i++) {

ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 7/7] i965: enable INTEL_blackhole_render

2018-07-10 Thread Lionel Landwerlin

On 10/07/18 15:04, Daniel Vetter wrote:

On Wed, Jun 20, 2018 at 06:25:34PM +0100, Lionel Landwerlin wrote:

v2: condition the extension on context isolation support from the
 kernel (Chris)

v3: (Lionel)

 The initial version of this change used a feature of the Gen7+
 command parser to turn the primitive instructions into no-ops.
 Unfortunately this doesn't play well with how we're using the
 hardware outside of the user submitted commands. For example
 resolves are implicit operations which should not be turned into
 no-ops as part of the previously submitted commands (before
 blackhole_render is enabled) might not be disabled. For example
 this sequence :

glClear();
glEnable(GL_BLACKHOLE_RENDER_INTEL);
glDrawArrays(...);
glReadPixels(...);
glDisable(GL_BLACKHOLE_RENDER_INTEL);

 While clear has been emitted outside the blackhole render, it
 should still be resolved properly in the read pixels. Hence we
 need to be more selective and only disable user submitted
 commands.

 This v3 manually turns primitives into MI_NOOP if blackhole render
 is enabled. This lets us enable this feature on any platform.

v4: Limit support to gen7.5+ (Lionel)

v5: Enable Gen7.5 support again, requires a kernel update of the
 command parser (Lionel)

v6: Disable Gen7.5 again... Kernel devs want these patches landed
 before they accept the kernel patches to whitelist INSTPM (Lionel)

Hm, this doesn't quite read how kernel patches are usually handled:
Ordering sequence is:
1. get everything reviewed and tested (both kernel and userspace), but do
not yet start merging
2. merge kernel (if you feel paranoid, wait until Dave Airlie accepted it
into drm-next)
3. merge userspace

Insisting that the userspace stuff lands before the kernel (even if it's
just prep work) is kinda the wrong way round, and needlessly complicates
the process.

This is all documented in full details in

https://dri.freedesktop.org/docs/drm/gpu/drm-uapi.html#open-source-userspace-requirements

Cheers, Daniel


Hey Daniel,

I remember somebody using the work "land" on IRC but it could have been 
a mistake :)
This is also to avoid committing code that might end up being wrong if 
somebody bumps the command parser version before my patches.


Thanks for reminder,

-
Lionel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] intel/aubinator_error_decode: Allow for more sections

2018-07-09 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 10/07/18 00:01, Jason Ekstrand wrote:

Error states coming from actual Vulkan applications tend to have fairly
long command buffers and lots of chained batches.  30 total BOs isn't
nearly enough.  This commit bumps it to 256, makes some things use the
actual number of sections instead of the #define, and adds asserts if we
ever go over 256 sections.
---
  src/intel/tools/aubinator_error_decode.c | 24 +---
  1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/intel/tools/aubinator_error_decode.c 
b/src/intel/tools/aubinator_error_decode.c
index 2735bd72714..4e3359bba9f 100644
--- a/src/intel/tools/aubinator_error_decode.c
+++ b/src/intel/tools/aubinator_error_decode.c
@@ -295,7 +295,8 @@ struct section {
 int count;
  };
  
-#define MAX_SECTIONS 30

+#define MAX_SECTIONS 256
+static unsigned num_sections;
  static struct section sections[MAX_SECTIONS];
  
  static int zlib_inflate(uint32_t **ptr, int len)

@@ -386,7 +387,7 @@ static int ascii85_decode(const char *in, uint32_t **out, 
bool inflate)
  static struct gen_batch_decode_bo
  get_gen_batch_bo(void *user_data, uint64_t address)
  {
-   for (int s = 0; s < MAX_SECTIONS; s++) {
+   for (int s = 0; s < num_sections; s++) {
if (sections[s].gtt_offset <= address &&
address < sections[s].gtt_offset + sections[s].count * 4) {
   return (struct gen_batch_decode_bo) {
@@ -411,7 +412,6 @@ read_data_file(FILE *file)
 uint32_t offset, value;
 char *ring_name = NULL;
 struct gen_device_info devinfo;
-   int sect_num = 0;
  
 while (getline(, _size, file) > 0) {

char *new_ring_name = NULL;
@@ -429,9 +429,10 @@ read_data_file(FILE *file)
  fprintf(stderr, "ASCII85 decode failed.\n");
  exit(EXIT_FAILURE);
   }
- sections[sect_num].data = data;
- sections[sect_num].count = count;
- sect_num++;
+ assert(num_sections < MAX_SECTIONS);
+ sections[num_sections].data = data;
+ sections[num_sections].count = count;
+ num_sections++;
   continue;
}
  
@@ -465,13 +466,14 @@ read_data_file(FILE *file)

 break;
   }
  
- sections[sect_num].buffer_name = b->name;

- sections[sect_num].ring_name = strdup(ring_name);
+ assert(num_sections < MAX_SECTIONS);
+ sections[num_sections].buffer_name = b->name;
+ sections[num_sections].ring_name = strdup(ring_name);
  
   uint32_t hi, lo;

   dashes = strchr(dashes, '=');
   if (dashes && sscanf(dashes, "= 0x%08x %08x\n", , ))
-sections[sect_num].gtt_offset = ((uint64_t) hi) << 32 | lo;
+sections[num_sections].gtt_offset = ((uint64_t) hi) << 32 | lo;
  
   continue;

}
@@ -598,7 +600,7 @@ read_data_file(FILE *file)
   xml_path, get_gen_batch_bo, NULL, NULL);
  
  
-   for (int s = 0; s < sect_num; s++) {

+   for (int s = 0; s < num_sections; s++) {
printf("--- %s (%s) at 0x%08x %08x\n",
   sections[s].buffer_name, sections[s].ring_name,
   (unsigned) (sections[s].gtt_offset >> 32),
@@ -615,7 +617,7 @@ read_data_file(FILE *file)
  
 gen_batch_decode_ctx_finish(_ctx);
  
-   for (int s = 0; s < sect_num; s++) {

+   for (int s = 0; s < num_sections; s++) {
free(sections[s].ring_name);
free(sections[s].data);
 }



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] intel/batch_decoder: Recurse for all 2nd level batches

2018-07-09 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 10/07/18 00:01, Jason Ekstrand wrote:

Our attempt to restart the loop with the second level batch worked at
one point but got broken at some point.  It was too fragile anyway and
we're not likely to have enough secondaries to actually overflow the
stack so we may as well recurse in both cases.
---
  src/intel/common/gen_batch_decoder.c | 19 +--
  1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/src/intel/common/gen_batch_decoder.c 
b/src/intel/common/gen_batch_decoder.c
index fc0ff95a476..fe7536da9ec 100644
--- a/src/intel/common/gen_batch_decoder.c
+++ b/src/intel/common/gen_batch_decoder.c
@@ -856,33 +856,24 @@ gen_print_batch(struct gen_batch_decode_ctx *ctx,
   if (next_batch.map == NULL) {
  fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable\n",
  next_batch.addr);
+ } else {
+gen_print_batch(ctx, next_batch.map, next_batch.size,
+next_batch.addr);
   }
-
   if (second_level) {
  /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
   * like a subroutine call.  Commands that come afterwards get
   * processed once the 2nd level batch buffer returns with
   * MI_BATCH_BUFFER_END.
   */
-if (next_batch.map) {
-   gen_print_batch(ctx, next_batch.map, next_batch.size,
-   next_batch.addr);
-}
+continue;
   } else {
  /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
   * like a goto.  Nothing after it will ever get processed.  In
   * order to prevent the recursion from growing, we just reset the
   * loop and continue;
   */
-if (next_batch.map) {
-   p = next_batch.map;
-   end = next_batch.map + next_batch.size;
-   length = 0;
-   continue;
-} else {
-   /* Nothing we can do */
-   break;
-}
+break;
   }
} else if (strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0) {
   break;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/tools/dump_gpu: Add option to print ppgtt mappings.

2018-07-09 Thread Lionel Landwerlin

Thanks!

Reviewed-by: Lionel Landwerlin 

On 09/07/18 21:28, Rafael Antognolli wrote:

Using -vv will increase the verbosity, by printing the ppgtt mappings as
they get written into the aub file.

Cc: Lionel Landwerlin 
---
  src/intel/tools/intel_dump_gpu.c  | 25 -
  src/intel/tools/intel_dump_gpu.in |  6 ++
  2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index c909d63d88f..1201fa35ae0 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -38,6 +38,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #include "intel_aub.h"
  
@@ -389,6 +390,11 @@ populate_ppgtt_table(struct ppgtt_table *table, int start, int end,

 uint64_t entries[512] = {0};
 int dirty_start = 512, dirty_end = 0;
  
+   if (verbose == 2) {

+  printf("  PPGTT (0x%016" PRIx64 "), lvl %d, start: %x, end: %x\n",
+ table->phys_addr, level, start, end);
+   }
+
 for (int i = start; i <= end; i++) {
if (!table->subtables[i]) {
   dirty_start = min(dirty_start, i);
@@ -396,11 +402,19 @@ populate_ppgtt_table(struct ppgtt_table *table, int 
start, int end,
   if (level == 1) {
  table->subtables[i] =
 (void *)(phys_addrs_allocator++ << 12);
+if (verbose == 2) {
+   printf("   Adding entry: %x, phys_addr: 0x%016" PRIx64 "\n",
+  i, (uint64_t)table->subtables[i]);
+}
   } else {
  table->subtables[i] =
 calloc(1, sizeof(struct ppgtt_table));
  table->subtables[i]->phys_addr =
 phys_addrs_allocator++ << 12;
+if (verbose == 2) {
+   printf("   Adding entry: %x, phys_addr: 0x%016" PRIx64 "\n",
+  i, table->subtables[i]->phys_addr);
+}
   }
}
entries[i] = 3 /* read/write | present */ |
@@ -434,6 +448,11 @@ map_ppgtt(uint64_t start, uint64_t size)
  #define L2_table(addr) (L3_table(addr)->subtables[L3_index(addr)])
  #define L1_table(addr) (L2_table(addr)->subtables[L2_index(addr)])
  
+   if (verbose == 2) {

+  printf(" Mapping PPGTT address: 0x%" PRIx64 ", size: %" PRIu64"\n",
+ start, size);
+   }
+
 populate_ppgtt_table(, L4_index(l4_start), L4_index(l4_end), 4);
  
 for (uint64_t l4 = l4_start; l4 < l4_end; l4 += (1ULL << 39)) {

@@ -1072,7 +1091,11 @@ maybe_init(void)
 config = fdopen(3, "r");
 while (fscanf(config, "%m[^=]=%m[^\n]\n", , ) != EOF) {
if (!strcmp(key, "verbose")) {
- verbose = 1;
+ if (!strcmp(value, "1")) {
+verbose = 1;
+ } else if (!strcmp(value, "2")) {
+verbose = 2;
+ }
} else if (!strcmp(key, "device")) {
   fail_if(sscanf(value, "%i", ) != 1,
   "intel_aubdump: failed to parse device id '%s'",
diff --git a/src/intel/tools/intel_dump_gpu.in 
b/src/intel/tools/intel_dump_gpu.in
index 875a67e7682..b9887f0ed2e 100755
--- a/src/intel/tools/intel_dump_gpu.in
+++ b/src/intel/tools/intel_dump_gpu.in
@@ -17,6 +17,8 @@ contents and execution of the GEM application.
  
-v Enable verbose output
  
+  -vvEnable extra verbosity - dumps gtt mappings

+
--help Display this help message and exit
  
  EOF

@@ -55,6 +57,10 @@ while true; do
  add_arg "verbose=1"
  shift 1
  ;;
+-vv)
+add_arg "verbose=2"
+shift 1
+;;
  -o*)
  file=${1##-o}
  add_arg "file=${file:-$(basename ${file}).aub}"



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/compiler: remove unused function

2018-07-09 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 09/07/18 10:49, Iago Toral Quiroga wrote:

---
  src/intel/compiler/brw_fs.cpp | 27 ---
  src/intel/compiler/brw_fs.h   |  4 
  2 files changed, 31 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 38a8621f2c..99b21f6d89 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -6444,33 +6444,6 @@ fs_visitor::dump_instruction(backend_instruction 
*be_inst, FILE *file)
 fprintf(file, "\n");
  }
  
-/**

- * Possibly returns an instruction that set up @param reg.
- *
- * Sometimes we want to take the result of some expression/variable
- * dereference tree and rewrite the instruction generating the result
- * of the tree.  When processing the tree, we know that the
- * instructions generated are all writing temporaries that are dead
- * outside of this tree.  So, if we have some instructions that write
- * a temporary, we're free to point that temp write somewhere else.
- *
- * Note that this doesn't guarantee that the instruction generated
- * only reg -- it might be the size=4 destination of a texture instruction.
- */
-fs_inst *
-fs_visitor::get_instruction_generating_reg(fs_inst *start,
-  fs_inst *end,
-  const fs_reg )
-{
-   if (end == start ||
-   end->is_partial_write() ||
-   !reg.equals(end->dst)) {
-  return NULL;
-   } else {
-  return end;
-   }
-}
-
  void
  fs_visitor::setup_fs_payload_gen6()
  {
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 25c433e44f..c09f0ccdd3 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -83,10 +83,6 @@ public:
 void setup_uniform_clipplane_values();
 void compute_clip_distance();
  
-   fs_inst *get_instruction_generating_reg(fs_inst *start,

-  fs_inst *end,
-  const fs_reg );
-
 void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder ,
 const fs_reg ,
 const fs_reg _index,



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] configure.ac: Check for xcb-randr version and xrandr to enable leasing

2018-07-09 Thread Lionel Landwerlin

Hey Danylo,

I didn't really understand why your change didn't just include a >= 1.13 
in the


if test x"$have_xlease" = xyes; then

I guess just adding the revision there (and configure.ac) and bumping 
the revision in meson.build should be enough.


Thanks,

-
Lionel

On 09/07/18 10:16, Danylo Piliaiev wrote:

Hi Lionel,

Yes meson probably also has the issue since it checks for xcb-randr 
1.12 and not 1.13 . I didn't send further patches since vulkan build 
got broken once more, see my reply to "[PATCH mesa 2/4] vulkan: add 
VK_EXT_display_control [v8]". Also I have upgrade to Ubuntu 18.04 and 
currently not affected by this. I'm not sure how to proceed further 
with the issue.


- Danil


On 06.07.18 22:30, Lionel Landwerlin wrote:

Hi Danylo,

I just run into the same issue, thanks for fixing this with autotools.
I think the same issue remains with meson though.

Thanks,

-
Lionel

On 20/06/18 14:25, Danylo Piliaiev wrote:

VK_USE_PLATFORM_XLIB_XRANDR_EXT requires xlib leasing which requires
xcb-randr >= 1.13. Also xrandr header is required for this extension.
The extension should not be automatically enabled if these dependencies
aren't satisfied.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106976

Signed-off-by: Danylo Piliaiev 
---
  configure.ac | 23 ++-
  1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/configure.ac b/configure.ac
index 0652410..ab59e06 100644
--- a/configure.ac
+++ b/configure.ac
@@ -97,6 +97,7 @@ XCBDRI2_REQUIRED=1.8
  XCBDRI3_MODIFIERS_REQUIRED=1.13
  XCBGLX_REQUIRED=1.8.1
  XCBPRESENT_MODIFIERS_REQUIRED=1.13
+XCBRANDR_XLEASE_REQUIRED=1.13
  XDAMAGE_REQUIRED=1.1
  XSHMFENCE_REQUIRED=1.1
  XVMC_REQUIRED=1.0.6
@@ -1867,18 +1868,6 @@ if test x"$enable_dri3" = xyes; then
  fi
  fi
  -
-if echo "$platforms" | grep -q 'x11' && echo "$platforms" | grep -q 
'drm'; then

-    have_xlease=yes
-else
-    have_xlease=no
-fi
-
-if test x"$have_xlease" = xyes; then
-    randr_modules="x11-xcb xcb-randr"
-    PKG_CHECK_MODULES([XCB_RANDR], [$randr_modules])
-fi
-
  AM_CONDITIONAL(HAVE_PLATFORM_X11, echo "$platforms" | grep -q 'x11')
  AM_CONDITIONAL(HAVE_PLATFORM_WAYLAND, echo "$platforms" | grep -q 
'wayland')

  AM_CONDITIONAL(HAVE_PLATFORM_DRM, echo "$platforms" | grep -q 'drm')
@@ -1896,12 +1885,20 @@ xno)
  ;;
  *)
  if echo "$platforms" | grep -q 'x11' && echo "$platforms" | 
grep -q 'drm'; then

-    enable_xlib_lease=yes
+    xlease_modules="x11-xcb xcb-randr >= 
$XCBRANDR_XLEASE_REQUIRED xrandr"
+    PKG_CHECK_EXISTS([$xlease_modules], 
[enable_xlib_lease=yes], [enable_xlib_lease=no])

  else
  enable_xlib_lease=no
  fi
  esac
  +if test x"$enable_xlib_lease" = xyes; then
+    randr_modules="x11-xcb xcb-randr >= $XCBRANDR_XLEASE_REQUIRED"
+    PKG_CHECK_MODULES([XCB_RANDR], [$randr_modules])
+    xlib_randr_modules="xrandr"
+    PKG_CHECK_MODULES([XLIB_RANDR], [$xlib_randr_modules])
+fi
+
  AM_CONDITIONAL(HAVE_XLIB_LEASE, test "x$enable_xlib_lease" = xyes)
    dnl





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/7] anv/pass: Use a designated initailizer for attachments

2018-07-08 Thread Lionel Landwerlin

With the version bumped in patch 7, patches 2-7 are :

Reviewed-by: Lionel Landwerlin 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/7] vulkan: Update the XML and headers to 1.1.80

2018-07-08 Thread Lionel Landwerlin

Acked-by: Lionel Landwerlin 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/7] anv: Add support for VK_KHR_create_renderpass2

2018-07-08 Thread Lionel Landwerlin

On 07/07/18 17:29, Jason Ekstrand wrote:

The implementation of CreateRenderPass2 uses the helpers we broke out in
previous commits.  The implementations of the new vkCmd functions just
call the old versions.
---
  src/intel/vulkan/anv_extensions.py |   1 +
  src/intel/vulkan/anv_pass.c| 140 +
  src/intel/vulkan/genX_cmd_buffer.c |  24 +
  3 files changed, 165 insertions(+)

diff --git a/src/intel/vulkan/anv_extensions.py 
b/src/intel/vulkan/anv_extensions.py
index 0f99f58ecb1..4179315a388 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -73,6 +73,7 @@ EXTENSIONS = [


You might want to bump API_PATCH_VERSION above.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: fix clear color bo address relocation

2018-07-07 Thread Lionel Landwerlin
Fixes: 7987d041fda0c9 ("i965/surface_state: Emit the clear color address 
instead of value.")
Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 73cae9ef7c5..9397b637c79 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -220,7 +220,7 @@ brw_emit_surface_state(struct brw_context *brw,
if (clear_bo != NULL) {
   /* Make sure the offset is aligned with a cacheline. */
   assert((clear_offset & 0x3f) == 0);
-  uint32_t *clear_address =
+  uint64_t *clear_address =
 state + brw->isl_dev.ss.clear_color_state_offset;
   *clear_address = brw_state_reloc(>batch,
*surf_offset +
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] vulkan: utils: handle hexadecimal values in registry

2018-07-06 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/vulkan/util/gen_enum_to_str.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vulkan/util/gen_enum_to_str.py 
b/src/vulkan/util/gen_enum_to_str.py
index bf883d5cb8f..fb9ecd65c6d 100644
--- a/src/vulkan/util/gen_enum_to_str.py
+++ b/src/vulkan/util/gen_enum_to_str.py
@@ -172,7 +172,7 @@ class VkEnum(object):
 def add_value_from_xml(self, elem, extension=None):
 if 'value' in elem.attrib:
 self.add_value(elem.attrib['name'],
-   value=int(elem.attrib['value']))
+   value=int(elem.attrib['value'], base=0))
 elif 'alias' in elem.attrib:
 self.add_value(elem.attrib['name'],
value=self.name_to_value[elem.attrib['alias']])
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] configure.ac: Check for xcb-randr version and xrandr to enable leasing

2018-07-06 Thread Lionel Landwerlin

Hi Danylo,

I just run into the same issue, thanks for fixing this with autotools.
I think the same issue remains with meson though.

Thanks,

-
Lionel

On 20/06/18 14:25, Danylo Piliaiev wrote:

VK_USE_PLATFORM_XLIB_XRANDR_EXT requires xlib leasing which requires
xcb-randr >= 1.13. Also xrandr header is required for this extension.
The extension should not be automatically enabled if these dependencies
aren't satisfied.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106976

Signed-off-by: Danylo Piliaiev 
---
  configure.ac | 23 ++-
  1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/configure.ac b/configure.ac
index 0652410..ab59e06 100644
--- a/configure.ac
+++ b/configure.ac
@@ -97,6 +97,7 @@ XCBDRI2_REQUIRED=1.8
  XCBDRI3_MODIFIERS_REQUIRED=1.13
  XCBGLX_REQUIRED=1.8.1
  XCBPRESENT_MODIFIERS_REQUIRED=1.13
+XCBRANDR_XLEASE_REQUIRED=1.13
  XDAMAGE_REQUIRED=1.1
  XSHMFENCE_REQUIRED=1.1
  XVMC_REQUIRED=1.0.6
@@ -1867,18 +1868,6 @@ if test x"$enable_dri3" = xyes; then
  fi
  fi
  
-

-if echo "$platforms" | grep -q 'x11' && echo "$platforms" | grep -q 'drm'; then
-have_xlease=yes
-else
-have_xlease=no
-fi
-
-if test x"$have_xlease" = xyes; then
-randr_modules="x11-xcb xcb-randr"
-PKG_CHECK_MODULES([XCB_RANDR], [$randr_modules])
-fi
-
  AM_CONDITIONAL(HAVE_PLATFORM_X11, echo "$platforms" | grep -q 'x11')
  AM_CONDITIONAL(HAVE_PLATFORM_WAYLAND, echo "$platforms" | grep -q 'wayland')
  AM_CONDITIONAL(HAVE_PLATFORM_DRM, echo "$platforms" | grep -q 'drm')
@@ -1896,12 +1885,20 @@ xno)
  ;;
  *)
  if echo "$platforms" | grep -q 'x11' && echo "$platforms" | grep -q 
'drm'; then
-enable_xlib_lease=yes
+xlease_modules="x11-xcb xcb-randr >= $XCBRANDR_XLEASE_REQUIRED xrandr"
+PKG_CHECK_EXISTS([$xlease_modules], [enable_xlib_lease=yes], 
[enable_xlib_lease=no])
  else
  enable_xlib_lease=no
  fi
  esac
  
+if test x"$enable_xlib_lease" = xyes; then

+randr_modules="x11-xcb xcb-randr >= $XCBRANDR_XLEASE_REQUIRED"
+PKG_CHECK_MODULES([XCB_RANDR], [$randr_modules])
+xlib_randr_modules="xrandr"
+PKG_CHECK_MODULES([XLIB_RANDR], [$xlib_randr_modules])
+fi
+
  AM_CONDITIONAL(HAVE_XLIB_LEASE, test "x$enable_xlib_lease" = xyes)
  
  dnl



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv/pipeline: honor the pipeline_cache_enabled run-time flag

2018-07-06 Thread Lionel Landwerlin

On 04/07/18 09:44, Iago Toral Quiroga wrote:

---
  src/intel/vulkan/anv_pipeline_cache.c | 37 +++
  1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline_cache.c 
b/src/intel/vulkan/anv_pipeline_cache.c
index d4c7262dc0..5825bf9f01 100644
--- a/src/intel/vulkan/anv_pipeline_cache.c
+++ b/src/intel/vulkan/anv_pipeline_cache.c
@@ -570,23 +570,26 @@ anv_device_search_for_kernel(struct anv_device *device,
 }
  


I guess you could do :

if (disk_cache && device->instance->pipeline_cache_enabled) {
...

to minimize the diff.

Do we still want to save stuff in the cache (anv_device_upload_kernel) 
when cache is disabled?


Regardless :

Reviewed-by: Lionel Landwerlin 

Thanks!


  #ifdef ENABLE_SHADER_CACHE
-   struct disk_cache *disk_cache = device->instance->physicalDevice.disk_cache;
-   if (disk_cache) {
-  cache_key cache_key;
-  disk_cache_compute_key(disk_cache, key_data, key_size, cache_key);
-
-  size_t buffer_size;
-  uint8_t *buffer = disk_cache_get(disk_cache, cache_key, _size);
-  if (buffer) {
- struct blob_reader blob;
- blob_reader_init(, buffer, buffer_size);
- bin = anv_shader_bin_create_from_blob(device, );
- free(buffer);
-
- if (bin) {
-if (cache)
-   anv_pipeline_cache_add_shader_bin(cache, bin);
-return bin;
+   if (device->instance->pipeline_cache_enabled) {
+  struct disk_cache *disk_cache =
+ device->instance->physicalDevice.disk_cache;
+  if (disk_cache) {
+ cache_key cache_key;
+ disk_cache_compute_key(disk_cache, key_data, key_size, cache_key);
+
+ size_t buffer_size;
+ uint8_t *buffer = disk_cache_get(disk_cache, cache_key, _size);
+ if (buffer) {
+struct blob_reader blob;
+blob_reader_init(, buffer, buffer_size);
+bin = anv_shader_bin_create_from_blob(device, );
+free(buffer);
+
+if (bin) {
+   if (cache)
+  anv_pipeline_cache_add_shader_bin(cache, bin);
+   return bin;
+}
   }
}
 }



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: don't check ccs_e support if isl_format is ISL_FORMAT_UNSUPPORTED

2018-07-06 Thread Lionel Landwerlin

Hi Dongwon,

Jason & I merged some patches to fix similar issues a few weeks ago.
I think we didn't change this function because a crash or hitting an 
assert is a good indication that something's gone wrong before we run 
into this function.


If you patch fixes an issue, could you give some detail about it?
Maybe a gdb backtrace?

Thanks,

-
Lionel

On 05/07/18 19:27, Dongwon Kim wrote:

'ISL_FORMAT_UNSUPPORTED' shouldn't be passed down for evaluation as it is
strictly prohibited in isl code (e.g. format_info_exists).

Signed-off-by: Dongwon Kim 
---
  src/mesa/drivers/dri/i965/intel_screen.c | 12 ++--
  1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index cb357419a7..a65042da72 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -346,8 +346,16 @@ modifier_is_supported(const struct gen_device_info 
*devinfo,
 */
format = _mesa_format_fallback_rgbx_to_rgba(format);
format = _mesa_get_srgb_format_linear(format);
-  if (!isl_format_supports_ccs_e(devinfo,
- brw_isl_format_for_mesa_format(format)))
+
+  enum isl_format isl_format;
+  isl_format = brw_isl_format_for_mesa_format(format);
+
+  /* whether there is supported ISL format for given mesa format */
+  if (isl_format == ISL_FORMAT_UNSUPPORTED)
+ return false;
+
+  /* check if isl_fomat supports ccs_e */
+  if (!isl_format_supports_ccs_e(devinfo, isl_format))
   return false;
 }
  



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel: tools: dump_gpu: fix ppgtt mapping

2018-07-06 Thread Lionel Landwerlin
We were not properly writing page tables when the virtual address
range spans multiple subtrees of the tables.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/intel_dump_gpu.c | 46 
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 8a7dd52e746..c909d63d88f 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -423,13 +423,7 @@ static void
 map_ppgtt(uint64_t start, uint64_t size)
 {
uint64_t l4_start = start & 0xff80;
-   uint64_t l3_start = start & 0xc000;
-   uint64_t l2_start = start & 0xffe0;
-   uint64_t l1_start = start & 0xf000;
uint64_t l4_end = ((start + size - 1) | 0x007f) & 0x;
-   uint64_t l3_end = ((start + size - 1) | 0x3fff) & 0x;
-   uint64_t l2_end = ((start + size - 1) | 0x001f) & 0x;
-   uint64_t l1_end = ((start + size - 1) | 0x0fff) & 0x;
 
 #define L4_index(addr) (((addr) >> 39) & 0x1ff)
 #define L3_index(addr) (((addr) >> 30) & 0x1ff)
@@ -442,28 +436,34 @@ map_ppgtt(uint64_t start, uint64_t size)
 
populate_ppgtt_table(, L4_index(l4_start), L4_index(l4_end), 4);
 
-   for (uint64_t a = l4_start; a < l4_end; a += (1ULL << 39)) {
-  uint64_t _start = max(a, l3_start);
-  uint64_t _end = min(a + (1ULL << 39), l3_end);
+   for (uint64_t l4 = l4_start; l4 < l4_end; l4 += (1ULL << 39)) {
+  uint64_t l3_start = max(l4, start & 0xc000);
+  uint64_t l3_end = min(l4 + (1ULL << 39),
+((start + size - 1) | 0x3fff) & 
0x);
+  uint64_t l3_start_idx = L3_index(l3_start);
+  uint64_t l3_end_idx = L3_index(l3_start) >= l3_start_idx ? 
L3_index(l3_end) : 0x1ff;
 
-  populate_ppgtt_table(L3_table(a), L3_index(_start),
-   L3_index(_end), 3);
-   }
+  populate_ppgtt_table(L3_table(l4), l3_start_idx, l3_end_idx, 3);
 
-   for (uint64_t a = l3_start; a < l3_end; a += (1ULL << 30)) {
-  uint64_t _start = max(a, l2_start);
-  uint64_t _end = min(a + (1ULL << 30), l2_end);
+  for (uint64_t l3 = l3_start; l3 < l3_end; l3 += (1ULL << 30)) {
+ uint64_t l2_start = max(l3, start & 0xffe0);
+ uint64_t l2_end = min(l3 + (1ULL << 30),
+   ((start + size - 1) | 0x001f) & 
0x);
+ uint64_t l2_start_idx = L2_index(l2_start);
+ uint64_t l2_end_idx = L2_index(l2_end) >= l2_start_idx ? 
L2_index(l2_end) : 0x1ff;
 
-  populate_ppgtt_table(L2_table(a), L2_index(_start),
-   L2_index(_end), 2);
-   }
+ populate_ppgtt_table(L2_table(l3), l2_start_idx, l2_end_idx, 2);
 
-   for (uint64_t a = l2_start; a < l2_end; a += (1ULL << 21)) {
-  uint64_t _start = max(a, l1_start);
-  uint64_t _end = min(a + (1ULL << 21), l1_end);
+ for (uint64_t l2 = l2_start; l2 < l2_end; l2 += (1ULL << 21)) {
+uint64_t l1_start = max(l2, start & 0xf000);
+uint64_t l1_end = min(l2 + (1ULL << 21),
+  ((start + size - 1) | 0x0fff) & 
0x);
+uint64_t l1_start_idx = L1_index(l1_start);
+uint64_t l1_end_idx = L1_index(l1_end) >= l1_start_idx ? 
L1_index(l1_end) : 0x1ff;
 
-  populate_ppgtt_table(L1_table(a), L1_index(_start),
-   L1_index(_end), 1);
+populate_ppgtt_table(L1_table(l2), l1_start_idx, l1_end_idx, 1);
+ }
+  }
}
 }
 
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] intel: tools: fix build on old systems

2018-07-06 Thread Lionel Landwerlin
Older system might not have support for memfd_create at the kernel
level. There we won't be able to use aubinator.

We also initially tried to workaround some libc having the
memfd_create syscall number defined, but not the memfd_create()
function.

This change makes dealing with the 2 problems above simpler by
creating our own syscall wrapper regardless. Aubinator won't be
compiled if the syscall number isn't defined.

v2: Simplify dealing with memfd_create (Matt)
Print error message if memfd_create syscall number not found (Eric)

Signed-off-by: Lionel Landwerlin 
---
 configure.ac | 13 +++--
 meson.build  |  2 +-
 src/intel/Makefile.tools.am  |  6 +-
 src/intel/tools/aubinator.c  |  9 +++--
 src/intel/tools/meson.build  | 30 --
 src/intel/vulkan/anv_allocator.c |  6 ++
 6 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/configure.ac b/configure.ac
index f135d057365..f2c1bd1cd8e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -875,7 +875,16 @@ AC_CHECK_HEADERS([endian.h])
 AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])
 AC_CHECK_FUNC([mkostemp], [DEFINES="$DEFINES -DHAVE_MKOSTEMP"])
 AC_CHECK_FUNC([timespec_get], [DEFINES="$DEFINES -DHAVE_TIMESPEC_GET"])
-AC_CHECK_FUNC([memfd_create], [DEFINES="$DEFINES -DHAVE_MEMFD_CREATE"])
+AC_CHECK_FUNC([memfd_create], [MEMFD_CREATE=yes], [MEMFD_CREATE=no])
+
+AC_COMPILE_IFELSE(
+[AC_LANG_PROGRAM([], [[
+#include 
+int main() { return SYS_memfd_create; }
+]])],
+have_memfd_nr=yes; AC_MSG_RESULT(yes),
+AC_MSG_RESULT(no))
+AM_CONDITIONAL(HAVE_MEMFD_CREATE, test "x$have_memfd_nr" = xyes)
 
 AC_MSG_CHECKING([whether strtod has locale support])
 AC_LINK_IFELSE([AC_LANG_SOURCE([[
@@ -2900,7 +2909,7 @@ if test "x$enable_llvm" = xyes; then
 fi
 
 dnl The gallium-xlib GLX and gallium OSMesa targets directly embed the
-dnl swr/llvmpipe driver into the final binary.  Adding LLVM_LIBS results 
in 
+dnl swr/llvmpipe driver into the final binary.  Adding LLVM_LIBS results in
 dnl the LLVM library propagated in the Libs.private of the respective .pc
 dnl file which ensures complete dependency information when statically
 dnl linking.
diff --git a/meson.build b/meson.build
index 7d12af3d510..73a004b77a1 100644
--- a/meson.build
+++ b/meson.build
@@ -960,7 +960,7 @@ elif cc.has_header_symbol('sys/mkdev.h', 'major')
   pre_args += '-DMAJOR_IN_MKDEV'
 endif
 
-foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h', 'endian.h']
+foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h', 'endian.h', 
'sys/memfd.h']
   if cc.compiles('#include <@0@>'.format(h), name : '@0@'.format(h))
 pre_args += '-DHAVE_@0@'.format(h.to_upper().underscorify())
   endif
diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
index b00cc8cc2cb..16cc1095f62 100644
--- a/src/intel/Makefile.tools.am
+++ b/src/intel/Makefile.tools.am
@@ -19,8 +19,12 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
+if HAVE_MEMFD_CREATE
+noinst_PROGRAMS += \
+   tools/aubinator
+endif
+
 noinst_PROGRAMS += \
-   tools/aubinator \
tools/aubinator_error_decode
 
 tools_aubinator_SOURCES = \
diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index 8989d558b66..a7b8697960a 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "util/list.h"
 #include "util/macros.h"
@@ -46,15 +47,11 @@
 #include "common/gen_gem.h"
 #include "intel_aub.h"
 
-#ifndef HAVE_MEMFD_CREATE
-#include 
-
 static inline int
-memfd_create(const char *name, unsigned int flags)
+local_memfd_create(const char *name, unsigned int flags)
 {
return syscall(SYS_memfd_create, name, flags);
 }
-#endif
 
 /* Below is the only command missing from intel_aub.h in libdrm
  * So, reuse intel_aub.h from libdrm and #define the
@@ -907,7 +904,7 @@ int main(int argc, char *argv[])
if (isatty(1) && pager)
   setup_pager();
 
-   mem_fd = memfd_create("phys memory", 0);
+   mem_fd = local_memfd_create("phys memory", 0);
 
list_inithead();
 
diff --git a/src/intel/tools/meson.build b/src/intel/tools/meson.build
index 705a353f26a..bf1db7c4542 100644
--- a/src/intel/tools/meson.build
+++ b/src/intel/tools/meson.build
@@ -18,16 +18,26 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-aubinator = executable(
-  'aubinator',
-  files('aubinator.c', 'intel_aub.h'),
-  dependencies : [dep_expat, dep_zlib, dep_dl, dep_thread, dep_m],
-  include_directories : [inc_common, inc_intel],
-  link_with : [libintel_common, libintel_compiler, libintel_dev, libmesa_util],
-  c_args : [c_vis_args, no_override_init_args],
-  buil

[Mesa-dev] [PATCH] intel: tools: fix build on old systems

2018-07-05 Thread Lionel Landwerlin
Older system might not have support for memfd_create at the kernel
level. There we won't be able to use aubinator.

We also initially tried to workaround some libc having the
memfd_create syscall number defined, but not the memfd_create()
function.

This change fixes the broken build on the travis CI by only compiling
aubinator if memfd_create() is available as part of the libc.
Annoyingly the man page says which should include  but
that header doesn't exist on my system and memfd_create() is instead
defined in bits/mman-shared.h. Hence the new checks...

Signed-off-by: Lionel Landwerlin 
---
 configure.ac|  8 +++-
 meson.build |  2 +-
 src/intel/Makefile.tools.am |  6 +-
 src/intel/tools/aubinator.c | 13 +++--
 src/intel/tools/meson.build | 33 +++--
 5 files changed, 39 insertions(+), 23 deletions(-)

diff --git a/configure.ac b/configure.ac
index f135d057365..939585411f9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -872,10 +872,16 @@ AC_HEADER_MAJOR
 AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"])
 AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
 AC_CHECK_HEADERS([endian.h])
+AC_CHECK_HEADERS([sys/memfd.h], [DEFINES="$DEFINES -DHAVE_SYS_MEMFD_H")
 AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])
 AC_CHECK_FUNC([mkostemp], [DEFINES="$DEFINES -DHAVE_MKOSTEMP"])
 AC_CHECK_FUNC([timespec_get], [DEFINES="$DEFINES -DHAVE_TIMESPEC_GET"])
-AC_CHECK_FUNC([memfd_create], [DEFINES="$DEFINES -DHAVE_MEMFD_CREATE"])
+AC_CHECK_FUNC([memfd_create], [MEMFD_CREATE=yes], [MEMFD_CREATE=no])
+
+AM_CONDITIONAL(HAVE_MEMFD_CREATE, test "x$MEMFD_CREATE" = xyes)
+if test "x$MEMFD_CREATE" = xyes; then
+   DEFINES="$DEFINES -DHAVE_MEMFD_CREATE"
+fi
 
 AC_MSG_CHECKING([whether strtod has locale support])
 AC_LINK_IFELSE([AC_LANG_SOURCE([[
diff --git a/meson.build b/meson.build
index b2722c71e5b..89f17128c03 100644
--- a/meson.build
+++ b/meson.build
@@ -956,7 +956,7 @@ elif cc.has_header_symbol('sys/mkdev.h', 'major')
   pre_args += '-DMAJOR_IN_MKDEV'
 endif
 
-foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h', 'endian.h']
+foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h', 'endian.h', 
'sys/memfd.h']
   if cc.compiles('#include <@0@>'.format(h), name : '@0@'.format(h))
 pre_args += '-DHAVE_@0@'.format(h.to_upper().underscorify())
   endif
diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
index b00cc8cc2cb..16cc1095f62 100644
--- a/src/intel/Makefile.tools.am
+++ b/src/intel/Makefile.tools.am
@@ -19,8 +19,12 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
+if HAVE_MEMFD_CREATE
+noinst_PROGRAMS += \
+   tools/aubinator
+endif
+
 noinst_PROGRAMS += \
-   tools/aubinator \
tools/aubinator_error_decode
 
 tools_aubinator_SOURCES = \
diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index 8989d558b66..24ec1a276d9 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -36,6 +36,9 @@
 #include 
 #include 
 #include 
+#ifdef HAVE_SYS_MEMFD_H
+#include 
+#endif
 
 #include "util/list.h"
 #include "util/macros.h"
@@ -46,16 +49,6 @@
 #include "common/gen_gem.h"
 #include "intel_aub.h"
 
-#ifndef HAVE_MEMFD_CREATE
-#include 
-
-static inline int
-memfd_create(const char *name, unsigned int flags)
-{
-   return syscall(SYS_memfd_create, name, flags);
-}
-#endif
-
 /* Below is the only command missing from intel_aub.h in libdrm
  * So, reuse intel_aub.h from libdrm and #define the
  * AUB_MI_BATCH_BUFFER_END as below
diff --git a/src/intel/tools/meson.build b/src/intel/tools/meson.build
index 705a353f26a..717f03c9002 100644
--- a/src/intel/tools/meson.build
+++ b/src/intel/tools/meson.build
@@ -18,16 +18,29 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-aubinator = executable(
-  'aubinator',
-  files('aubinator.c', 'intel_aub.h'),
-  dependencies : [dep_expat, dep_zlib, dep_dl, dep_thread, dep_m],
-  include_directories : [inc_common, inc_intel],
-  link_with : [libintel_common, libintel_compiler, libintel_dev, libmesa_util],
-  c_args : [c_vis_args, no_override_init_args],
-  build_by_default : with_tools.contains('intel'),
-  install : with_tools.contains('intel'),
-)
+has_memfd_create = cc.compiles('''#include 
+  int main() {
+ return memfd_create("", 0);
+  }''',
+   name : 'memfd create') or
+   cc.compiles('''#include 
+  int main() {
+ return memfd_create("", 0);
+  }''',
+   

[Mesa-dev] [PATCH 2/3] util: u_queue: fix android build error

2018-07-05 Thread Lionel Landwerlin
mesa/src/util/u_queue.c:242:15: error: address of array 'queue->name'
  will always evaluate to 'true' [-Werror,-Wpointer-bool-conversion]

Signed-off-by: Lionel Landwerlin 
---
 src/util/u_queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/util/u_queue.c b/src/util/u_queue.c
index 8a43320e59f..be95d9eec3f 100644
--- a/src/util/u_queue.c
+++ b/src/util/u_queue.c
@@ -239,7 +239,7 @@ util_queue_thread_func(void *input)
 
free(input);
 
-   if (queue->name) {
+   if (strlen(queue->name) > 0) {
   char name[16];
   util_snprintf(name, sizeof(name), "%s%i", queue->name, thread_index);
   u_thread_setname(name);
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] intel: compiler: silence compiler warning

2018-07-05 Thread Lionel Landwerlin
src/intel/compiler/brw_fs.cpp:5752:45: warning: comparison between
   signed and unsigned integer expressions [-Wsign-compare]

Signed-off-by: Lionel Landwerlin 
---
 src/intel/compiler/brw_fs.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index c8062de4ffb..fde06ddfc17 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -5689,7 +5689,7 @@ fs_visitor::lower_simd_width()
  /* Split the copies in chunks of the execution width of either the
   * original or the lowered instruction, whichever is lower.
   */
- const unsigned n = DIV_ROUND_UP(inst->exec_size, lower_width);
+ const int n = DIV_ROUND_UP(inst->exec_size, lower_width);
  const unsigned dst_size = inst->size_written /
 inst->dst.component_size(inst->exec_size);
 
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] i965: silence android build warning

2018-07-05 Thread Lionel Landwerlin
mesa/drivers/dri/i965/genX_state_upload.c:2983:46: warning: implicit
  conversion from enumeration type 'enum gl_logicop_mode' to different
  enumeration type 'enum GEN4_3D_Logic_Op_Function' [-Wenum-conversion]

Introduced by 0c69db895f790a ("i965: Use the translated color logic op
from the context")

Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/genX_state_upload.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 7fe12887030..844ecddb8a5 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -2980,7 +2980,7 @@ set_blend_entry_bits(struct brw_context *brw, 
BLEND_ENTRY_GENXML *entry, int i,
 _mesa_enum_to_string(rb_type));
   if (GEN_GEN >= 8 || rb_type == GL_UNSIGNED_NORMALIZED) {
  entry->LogicOpEnable = true;
- entry->LogicOpFunction = ctx->Color._LogicOp;
+ entry->LogicOpFunction = (enum GENX(3D_Logic_Op_Function)) 
ctx->Color._LogicOp;
   }
} else if (blend_enabled && !ctx->Color._AdvancedBlendMode
   && (GEN_GEN <= 5 || !integer)) {
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] util/tests/rb_tree: add unit test

2018-07-05 Thread Lionel Landwerlin
Test written by Jason :

   https://github.com/jekstrand/rb-tree

Signed-off-by: Lionel Landwerlin 
---
 src/util/Makefile.am  |   1 +
 src/util/meson.build  |   1 +
 src/util/tests/rb_tree/Makefile.am|  39 ++
 src/util/tests/rb_tree/meson.build|  29 
 src/util/tests/rb_tree/rb_tree_test.c | 184 ++
 5 files changed, 254 insertions(+)
 create mode 100644 src/util/tests/rb_tree/Makefile.am
 create mode 100644 src/util/tests/rb_tree/meson.build
 create mode 100644 src/util/tests/rb_tree/rb_tree_test.c

diff --git a/src/util/Makefile.am b/src/util/Makefile.am
index 65794338c5b..b4182219f1d 100644
--- a/src/util/Makefile.am
+++ b/src/util/Makefile.am
@@ -22,6 +22,7 @@
 SUBDIRS = . \
xmlpool \
tests/hash_table \
+   tests/rb_tree \
tests/string_buffer
 
 if HAVE_STD_CXX11
diff --git a/src/util/meson.build b/src/util/meson.build
index 1838719d271..d45478ffe2b 100644
--- a/src/util/meson.build
+++ b/src/util/meson.build
@@ -162,6 +162,7 @@ if with_tests
   )
 
   subdir('tests/hash_table')
+  subdir('tests/rb_tree')
   subdir('tests/string_buffer')
   subdir('tests/vma')
 endif
diff --git a/src/util/tests/rb_tree/Makefile.am 
b/src/util/tests/rb_tree/Makefile.am
new file mode 100644
index 000..6c10806c495
--- /dev/null
+++ b/src/util/tests/rb_tree/Makefile.am
@@ -0,0 +1,39 @@
+# Copyright © 2018 Intel Corporation
+#
+#  Permission is hereby granted, free of charge, to any person obtaining a
+#  copy of this software and associated documentation files (the "Software"),
+#  to deal in the Software without restriction, including without limitation
+#  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+#  and/or sell copies of the Software, and to permit persons to whom the
+#  Software is furnished to do so, subject to the following conditions:
+#
+#  The above copyright notice and this permission notice (including the next
+#  paragraph) shall be included in all copies or substantial portions of the
+#  Software.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+#  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+#  IN THE SOFTWARE.
+
+AM_CPPFLAGS = \
+   -I$(top_srcdir)/include \
+   -I$(top_srcdir)/src/util \
+   $(DEFINES)
+
+TESTS = rb_tree_random_test
+
+check_PROGRAMS = $(TESTS)
+
+vma_random_test_SOURCES = \
+   rb_tree_test.c
+
+vma_random_test_LDADD = \
+   $(top_builddir)/src/util/libmesautil.la
+
+vma_random_test_CXXFLAGS = $(CXX11_CXXFLAGS)
+
+EXTRA_DIST = meson.build
diff --git a/src/util/tests/rb_tree/meson.build 
b/src/util/tests/rb_tree/meson.build
new file mode 100644
index 000..3b4e4c7449f
--- /dev/null
+++ b/src/util/tests/rb_tree/meson.build
@@ -0,0 +1,29 @@
+# Copyright © 2018 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+test(
+  'rb_tree',
+  executable(
+'rb_tree_test',
+'rb_tree_test.c',
+include_directories : [inc_include, inc_util],
+link_with : [libmesa_util],
+  )
+)
diff --git a/src/util/tests/rb_tree/rb_tree_test.c 
b/src/util/tests/rb_tree/rb_tree_test.c
new file mode 100644
index 000..db247958918
--- /dev/null
+++ b/src/util/tests/rb_tree/rb_tree_test.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright © 2017 Jason Ekstrand
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the right

Re: [Mesa-dev] [PATCH] anv/icl: Don't set float blend optimization bit in CACHE_MODE_SS

2018-07-04 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 03/07/18 21:24, Anuj Phogat wrote:

Bump
On Fri, Jun 1, 2018 at 2:40 PM Anuj Phogat  wrote:

CACHE_MODE_SS is not listed in gfxspecs table for user mode
non-privileged registers. So, making any changes from Mesa
will do nothing. Kernel is already setting this bit in
CACHE_MODE_SS register which is saved/restored to/from
the HW context image.

Signed-off-by: Anuj Phogat 
Cc: Lionel Landwerlin 
---
  src/intel/vulkan/genX_state.c | 12 
  1 file changed, 12 deletions(-)

diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
index c6e54046910..06dc2d345e2 100644
--- a/src/intel/vulkan/genX_state.c
+++ b/src/intel/vulkan/genX_state.c
@@ -121,18 +121,6 @@ genX(init_device_state)(struct anv_device *device)
 }
  #endif

-#if GEN_GEN == 10 || GEN_GEN == 11
-   uint32_t cache_mode_ss;
-   anv_pack_struct(_mode_ss, GENX(CACHE_MODE_SS),
-   .FloatBlendOptimizationEnable = true,
-   .FloatBlendOptimizationEnableMask = true);
-
-   anv_batch_emit(, GENX(MI_LOAD_REGISTER_IMM), lri) {
-  lri.RegisterOffset = GENX(CACHE_MODE_SS_num);
-  lri.DataDWord  = cache_mode_ss;
-   }
-#endif
-
 anv_batch_emit(, GENX(3DSTATE_AA_LINE_PARAMETERS), aa);

 anv_batch_emit(, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
--
2.17.0



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] anv/cmd_buffer: never shrink the push constant buffer size

2018-07-01 Thread Lionel Landwerlin
I reread the discussion you had with Jason in order to figure out why 
this change is required.
Maybe adding a comment at the top of the function would be a good bit of 
documentation for future developers ;)


Regardless this series is :

Reviewed-by: Lionel Landwerlin 

Thanks!

On 29/06/18 09:10, Iago Toral Quiroga wrote:

If we have to re-emit push constant data, we need to re-emit all
of it.
---
  src/intel/vulkan/anv_cmd_buffer.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_cmd_buffer.c 
b/src/intel/vulkan/anv_cmd_buffer.c
index 33687920a38..3e9f000f7b8 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -166,6 +166,7 @@ anv_cmd_buffer_ensure_push_constants_size(struct 
anv_cmd_buffer *cmd_buffer,
   anv_batch_set_error(_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY);
   return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
+  (*ptr)->size = size;
 } else if ((*ptr)->size < size) {
*ptr = vk_realloc(_buffer->pool->alloc, *ptr, size, 8,
   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@@ -173,8 +174,8 @@ anv_cmd_buffer_ensure_push_constants_size(struct 
anv_cmd_buffer *cmd_buffer,
   anv_batch_set_error(_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY);
   return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
+  (*ptr)->size = size;
 }
-   (*ptr)->size = size;
  
 return VK_SUCCESS;

  }



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 05/16] util: rb-tree: A simple, invasive, red-black tree

2018-06-22 Thread Lionel Landwerlin

On 22/06/18 16:41, Chris Wilson wrote:

Quoting Lionel Landwerlin (2018-06-21 17:29:04)

From: Jason Ekstrand 

This is a simple, invasive, liberally licensed red-black tree
implementation. It's an invasive data structure similar to the
Linux kernel linked-list where the intention is that you embed a

s/linked-list/rbtree/

Might as well compare like for like.


rb_node struct the data structure you intend to put into the
tree.

The implementation is mostly based on the one in "Introduction to
Algorithms", third edition, by Cormen, Leiserson, Rivest, and
Stein. There were a few other key design points:

  * It's an invasive data structure similar to the [Linux kernel
linked list].

  * It uses NULL for leaves instead of a sentinel. This means a few
algorithms differ a small bit from the ones in "Introduction to
Algorithms".

  * All search operations are inlined so that the compiler can
optimize away the function pointer call.
---
  src/util/Makefile.sources |   2 +
  src/util/meson.build  |   2 +
  src/util/rb_tree.c| 421 ++
  src/util/rb_tree.h| 269 
No tester? Insert/remove 1,000,000 u32 and check the post-order is 
sorted and has correct coloring? (I'm stealing ideas from 
kernel/lib/rbtree_test.c fwiw.) -Chris
I've written a test generating N random insertions/deletions, will send 
later.


-
Lionel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 05/16] util: rb-tree: A simple, invasive, red-black tree

2018-06-22 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 21/06/18 17:29, Lionel Landwerlin wrote:

From: Jason Ekstrand 

This is a simple, invasive, liberally licensed red-black tree
implementation. It's an invasive data structure similar to the
Linux kernel linked-list where the intention is that you embed a
rb_node struct the data structure you intend to put into the
tree.

The implementation is mostly based on the one in "Introduction to
Algorithms", third edition, by Cormen, Leiserson, Rivest, and
Stein. There were a few other key design points:

  * It's an invasive data structure similar to the [Linux kernel
linked list].

  * It uses NULL for leaves instead of a sentinel. This means a few
algorithms differ a small bit from the ones in "Introduction to
Algorithms".

  * All search operations are inlined so that the compiler can
optimize away the function pointer call.
---
  src/util/Makefile.sources |   2 +
  src/util/meson.build  |   2 +
  src/util/rb_tree.c| 421 ++
  src/util/rb_tree.h| 269 
  4 files changed, 694 insertions(+)
  create mode 100644 src/util/rb_tree.c
  create mode 100644 src/util/rb_tree.h

diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
index 534520ce763..37eb0880e35 100644
--- a/src/util/Makefile.sources
+++ b/src/util/Makefile.sources
@@ -30,6 +30,8 @@ MESA_UTIL_FILES := \
ralloc.h \
rand_xor.c \
rand_xor.h \
+   rb_tree.c \
+   rb_tree.h \
register_allocate.c \
register_allocate.h \
rgtc.c \
diff --git a/src/util/meson.build b/src/util/meson.build
index c777984e28d..62425bb237b 100644
--- a/src/util/meson.build
+++ b/src/util/meson.build
@@ -54,6 +54,8 @@ files_mesa_util = files(
'ralloc.h',
'rand_xor.c',
'rand_xor.h',
+  'rb_tree.c',
+  'rb_tree.h',
'register_allocate.c',
'register_allocate.h',
'rgtc.c',
diff --git a/src/util/rb_tree.c b/src/util/rb_tree.c
new file mode 100644
index 000..a86fa31a809
--- /dev/null
+++ b/src/util/rb_tree.c
@@ -0,0 +1,421 @@
+/*
+ * Copyright © 2017 Jason Ekstrand
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "rb_tree.h"
+
+/** \file rb_tree.c
+ *
+ * An implementation of a red-black tree
+ *
+ * This file implements the guts of a red-black tree.  The implementation
+ * is mostly based on the one in "Introduction to Algorithms", third
+ * edition, by Cormen, Leiserson, Rivest, and Stein.  The primary
+ * divergence in our algorithms from those presented in CLRS is that we use
+ * NULL for the leaves instead of a sentinel.  This means we have to do a
+ * tiny bit more tracking in our implementation of delete but it makes the
+ * algorithms far more explicit than stashing stuff in the sentinel.
+ */
+
+#include 
+#include 
+#include 
+
+static bool
+rb_node_is_black(struct rb_node *n)
+{
+/* NULL nodes are leaves and therefore black */
+return (n == NULL) || (n->parent & 1);
+}
+
+static bool
+rb_node_is_red(struct rb_node *n)
+{
+return !rb_node_is_black(n);
+}
+
+static void
+rb_node_set_black(struct rb_node *n)
+{
+n->parent |= 1;
+}
+
+static void
+rb_node_set_red(struct rb_node *n)
+{
+n->parent &= ~1ull;
+}
+
+static void
+rb_node_copy_color(struct rb_node *dst, struct rb_node *src)
+{
+dst->parent = (dst->parent & ~1ull) | (src->parent & 1);
+}
+
+static void
+rb_node_set_parent(struct rb_node *n, struct rb_node *p)
+{
+n->parent = (n->parent & 1) | (uintptr_t)p;
+}
+
+static struct rb_node *
+rb_node_minimum(struct rb_node *node)
+{
+while (node->left)
+node = node->left;
+return node;
+}
+
+static struct rb_node *
+rb_node_maximum(struct rb_node *node)
+{
+while (node->right)
+node = node->right;
+return node;
+}
+
+void
+rb_tree_init(struct

[Mesa-dev] [PATCH v3 06/16] intel: aubinator: handle GGTT mappings

2018-06-21 Thread Lionel Landwerlin
We use memfd to store physical pages as they get read/written to and
the GGTT entries translating virtual address to physical pages.

Based on a commit by Scott Phillips.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/aubinator.c | 257 ++--
 1 file changed, 244 insertions(+), 13 deletions(-)

diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index 3b04ba3f431..05083dbcda0 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -39,12 +39,23 @@
 
 #include "util/list.h"
 #include "util/macros.h"
+#include "util/rb_tree.h"
 
 #include "common/gen_decoder.h"
 #include "common/gen_disasm.h"
 #include "common/gen_gem.h"
 #include "intel_aub.h"
 
+#ifndef HAVE_MEMFD_CREATE
+#include 
+
+static inline int
+memfd_create(const char *name, unsigned int flags)
+{
+   return syscall(SYS_memfd_create, name, flags);
+}
+#endif
+
 /* Below is the only command missing from intel_aub.h in libdrm
  * So, reuse intel_aub.h from libdrm and #define the
  * AUB_MI_BATCH_BUFFER_END as below
@@ -73,20 +84,39 @@ struct gen_batch_decode_ctx batch_ctx;
 struct bo_map {
struct list_head link;
struct gen_batch_decode_bo bo;
+   bool unmap_after_use;
+};
+
+struct ggtt_entry {
+   struct rb_node node;
+   uint64_t virt_addr;
+   uint64_t phys_addr;
+};
+
+struct phys_mem {
+   struct rb_node node;
+   uint64_t fd_offset;
+   uint64_t phys_addr;
+   uint8_t *data;
 };
 
 static struct list_head maps;
+static struct rb_tree ggtt = {NULL};
+static struct rb_tree mem = {NULL};
+int mem_fd = -1;
+off_t mem_fd_len = 0;
 
 FILE *outfile;
 
 struct brw_instruction;
 
 static void
-add_gtt_bo_map(struct gen_batch_decode_bo bo)
+add_gtt_bo_map(struct gen_batch_decode_bo bo, bool unmap_after_use)
 {
struct bo_map *m = calloc(1, sizeof(*m));
 
m->bo = bo;
+   m->unmap_after_use = unmap_after_use;
list_add(>link, );
 }
 
@@ -94,21 +124,209 @@ static void
 clear_bo_maps(void)
 {
list_for_each_entry_safe(struct bo_map, i, , link) {
+  if (i->unmap_after_use)
+ munmap((void *)i->bo.map, i->bo.size);
   list_del(>link);
   free(i);
}
 }
 
+static inline struct ggtt_entry *
+ggtt_entry_next(struct ggtt_entry *entry)
+{
+   if (!entry)
+  return NULL;
+   struct rb_node *node = rb_node_next(>node);
+   if (!node)
+  return NULL;
+   return rb_node_data(struct ggtt_entry, node, node);
+}
+
+static inline int
+cmp_uint64(uint64_t a, uint64_t b)
+{
+   if (a < b)
+  return -1;
+   if (a > b)
+  return 1;
+   return 0;
+}
+
+static inline int
+cmp_ggtt_entry(const struct rb_node *node, const void *addr)
+{
+   struct ggtt_entry *entry = rb_node_data(struct ggtt_entry, node, node);
+   return cmp_uint64(entry->virt_addr, *(const uint64_t *)addr);
+}
+
+static struct ggtt_entry *
+ensure_ggtt_entry(struct rb_tree *tree, uint64_t virt_addr)
+{
+   struct rb_node *node = rb_tree_search_sloppy(, _addr,
+cmp_ggtt_entry);
+   int cmp = 0;
+   if (!node || (cmp = cmp_ggtt_entry(node, _addr))) {
+  struct ggtt_entry *new_entry = calloc(1, sizeof(*new_entry));
+  new_entry->virt_addr = virt_addr;
+  rb_tree_insert_at(, node, _entry->node, cmp > 0);
+  node = _entry->node;
+   }
+
+   return rb_node_data(struct ggtt_entry, node, node);
+}
+
+static struct ggtt_entry *
+search_ggtt_entry(uint64_t virt_addr)
+{
+   virt_addr &= ~0xfff;
+
+   struct rb_node *node = rb_tree_search(, _addr, cmp_ggtt_entry);
+
+   if (!node)
+  return NULL;
+
+   return rb_node_data(struct ggtt_entry, node, node);
+}
+
+static inline int
+cmp_phys_mem(const struct rb_node *node, const void *addr)
+{
+   struct phys_mem *mem = rb_node_data(struct phys_mem, node, node);
+   return cmp_uint64(mem->phys_addr, *(uint64_t *)addr);
+}
+
+static struct phys_mem *
+ensure_phys_mem(uint64_t phys_addr)
+{
+   struct rb_node *node = rb_tree_search_sloppy(, _addr, 
cmp_phys_mem);
+   int cmp = 0;
+   if (!node || (cmp = cmp_phys_mem(node, _addr))) {
+  struct phys_mem *new_mem = calloc(1, sizeof(*new_mem));
+  new_mem->phys_addr = phys_addr;
+  new_mem->fd_offset = mem_fd_len;
+
+  int ftruncate_res = ftruncate(mem_fd, mem_fd_len += 4096);
+  assert(ftruncate_res == 0);
+
+  new_mem->data = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED,
+   mem_fd, new_mem->fd_offset);
+  assert(new_mem->data != MAP_FAILED);
+
+  rb_tree_insert_at(, node, _mem->node, cmp > 0);
+  node = _mem->node;
+   }
+
+   return rb_node_data(struct phys_mem, node, node);
+}
+
+static struct phys_mem *
+search_phys_mem(uint64_t phys_addr)
+{
+   phys_addr &= ~0xfff;
+
+   struct rb_node *node = rb_tree_search(, _addr, cmp_phys_mem);
+
+   if (!node)
+  return NULL;
+
+   return rb_node_data(struct phys_

[Mesa-dev] [PATCH v3 13/16] intel: tools: dump-gpu: dump 48-bit addresses

2018-06-21 Thread Lionel Landwerlin
From: Scott D Phillips 

For gen8+, write out PPGTT tables in aub files so that full 48-bit
addresses can be serialized.

v2: Fix handling of `end` index in map_ppgtt

v3: Correctly mark GGTT entry as present (Rafael)

Signed-off-by: Scott D Phillips 
Signed-off-by: Lionel Landwerlin 
Cc: Jordan Justen 
---
 src/intel/tools/intel_aub.h  |   3 +-
 src/intel/tools/intel_dump_gpu.c | 315 +++
 2 files changed, 151 insertions(+), 167 deletions(-)

diff --git a/src/intel/tools/intel_aub.h b/src/intel/tools/intel_aub.h
index 9ca548edaf3..2888515048f 100644
--- a/src/intel/tools/intel_aub.h
+++ b/src/intel/tools/intel_aub.h
@@ -117,7 +117,8 @@
 /* DW3 */
 
 #define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_MASK0xf000
-#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_LOCAL   (1 << 28)
+#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT(0 << 28)
+#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL(2 << 28)
 #define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY  (4 << 28)
 
 /**
diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 86c133da433..a9ce109b2b6 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -51,6 +51,8 @@
 #define MI_LOAD_REGISTER_IMM_n(n) ((0x22 << 23) | (2 * (n) - 1))
 #define MI_LRI_FORCE_POSTED   (1<<12)
 
+#define MI_BATCH_NON_SECURE_I965 (1 << 8)
+
 #define MI_BATCH_BUFFER_END (0xA << 23)
 
 #define min(a, b) ({\
@@ -59,6 +61,12 @@
  _a < _b ? _a : _b; \
   })
 
+#define max(a, b) ({\
+ __typeof(a) _a = (a);  \
+ __typeof(b) _b = (b);  \
+ _a > _b ? _a : _b; \
+  })
+
 #define HWS_PGA_RCSUNIT  0x02080
 #define HWS_PGA_VCSUNIT0   0x12080
 #define HWS_PGA_BCSUNIT  0x22080
@@ -93,8 +101,12 @@
 
 #define RING_SIZE (1 * 4096)
 #define PPHWSP_SIZE (1 * 4096)
-#define GEN10_LR_CONTEXT_RENDER_SIZE   (19 * 4096)
-#define GEN8_LR_CONTEXT_OTHER_SIZE   (2 * 4096)
+#define GEN11_LR_CONTEXT_RENDER_SIZE(14 * 4096)
+#define GEN10_LR_CONTEXT_RENDER_SIZE(19 * 4096)
+#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * 4096)
+#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * 4096)
+#define GEN8_LR_CONTEXT_OTHER_SIZE  (2 * 4096)
+
 
 #define STATIC_GGTT_MAP_START 0
 
@@ -110,14 +122,19 @@
 #define STATIC_GGTT_MAP_END (VIDEO_CONTEXT_ADDR + PPHWSP_SIZE + 
GEN8_LR_CONTEXT_OTHER_SIZE)
 #define STATIC_GGTT_MAP_SIZE (STATIC_GGTT_MAP_END - STATIC_GGTT_MAP_START)
 
-#define CONTEXT_FLAGS (0x229)   /* Normal Priority | L3-LLC Coherency |
-   Legacy Context with no 64 bit VA support | 
Valid */
+#define PML4_PHYS_ADDR ((uint64_t)(STATIC_GGTT_MAP_END))
+
+#define CONTEXT_FLAGS (0x339)   /* Normal Priority | L3-LLC Coherency |
+ * PPGTT Enabled |
+ * Legacy Context with 64 bit VA support |
+ * Valid
+ */
 
-#define RENDER_CONTEXT_DESCRIPTOR  ((uint64_t)1 << 32 | RENDER_CONTEXT_ADDR  | 
CONTEXT_FLAGS)
-#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 32 | BLITTER_CONTEXT_ADDR | 
CONTEXT_FLAGS)
-#define VIDEO_CONTEXT_DESCRIPTOR   ((uint64_t)3 << 32 | VIDEO_CONTEXT_ADDR   | 
CONTEXT_FLAGS)
+#define RENDER_CONTEXT_DESCRIPTOR  ((uint64_t)1 << 62 | RENDER_CONTEXT_ADDR  | 
CONTEXT_FLAGS)
+#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 62 | BLITTER_CONTEXT_ADDR | 
CONTEXT_FLAGS)
+#define VIDEO_CONTEXT_DESCRIPTOR   ((uint64_t)3 << 62 | VIDEO_CONTEXT_ADDR   | 
CONTEXT_FLAGS)
 
-static const uint32_t render_context_init[GEN10_LR_CONTEXT_RENDER_SIZE /
+static const uint32_t render_context_init[GEN9_LR_CONTEXT_RENDER_SIZE / /* 
Choose the largest */
   sizeof(uint32_t)] = {
0 /* MI_NOOP */,
MI_LOAD_REGISTER_IMM_n(14) | MI_LRI_FORCE_POSTED,
@@ -147,8 +164,8 @@ static const uint32_t 
render_context_init[GEN10_LR_CONTEXT_RENDER_SIZE /
0x2280 /* PDP2_LDW */,  0,
0x227C /* PDP1_UDW */,  0,
0x2278 /* PDP1_LDW */,  0,
-   0x2274 /* PDP0_UDW */,  0,
-   0x2270 /* PDP0_LDW */,  0,
+   0x2274 /* PDP0_UDW */,  PML4_PHYS_ADDR >> 32,
+   0x2270 /* PDP0_LDW */,  PML4_PHYS_ADDR,
/* MI_NOOP */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 
@@ -185,8 +202,8 @@ static const uint32_t 
blitter_context_init[GEN8_LR_CONTEXT_OTHER_SIZE /
0x22280 /* PDP2_LDW */,  0,
0x2227C /* PDP1_UDW */,  0,
0x22278 /* PDP1_LDW */,  0,
-   0x22274 /* PDP0_UDW */,  0,
-   0x22270 /* PDP0_LDW */,  0,
+   0x22274 /* PDP0_UDW */,  PML4_PHYS_ADDR >> 32,
+   0x22270 /* PDP0_LDW */,  PML4_PHYS_ADDR,
/* MI_NOOP */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 
@@ -220,8 +2

[Mesa-dev] [PATCH v3 15/16] intel: intel_dump_gpu: use simulator id in captures

2018-06-21 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/intel_aub.h  | 2 +-
 src/intel/tools/intel_dump_gpu.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/intel/tools/intel_aub.h b/src/intel/tools/intel_aub.h
index 2888515048f..74ca26ab9bb 100644
--- a/src/intel/tools/intel_aub.h
+++ b/src/intel/tools/intel_aub.h
@@ -103,7 +103,7 @@
 /* DW2 */
 
 #define AUB_MEM_TRACE_VERSION_DEVICE_MASK  0xff00
-#define AUB_MEM_TRACE_VERSION_DEVICE_CNL   (15 << 8)
+#define AUB_MEM_TRACE_VERSION_DEVICE_SHIFT  8
 
 #define AUB_MEM_TRACE_VERSION_METHOD_MASK  0x000c
 #define AUB_MEM_TRACE_VERSION_METHOD_PHY   (1 << 18)
diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index a9ce109b2b6..6107035d5bc 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -512,7 +512,7 @@ write_execlists_header(void)
dwords = 5 + app_name_len / sizeof(uint32_t);
dword_out(CMD_MEM_TRACE_VERSION | (dwords - 1));
dword_out(AUB_MEM_TRACE_VERSION_FILE_VERSION);
-   dword_out(AUB_MEM_TRACE_VERSION_DEVICE_CNL);
+   dword_out(devinfo.simulator_id << AUB_MEM_TRACE_VERSION_DEVICE_SHIFT);
dword_out(0);  /* version */
dword_out(0);  /* version */
data_out(app_name, app_name_len);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 16/16] intel: tools: remove drm-uapi defines

2018-06-21 Thread Lionel Landwerlin
We already embed the headers, no need to redefine defines/structs.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/intel_dump_gpu.c | 30 +-
 1 file changed, 1 insertion(+), 29 deletions(-)

diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 6107035d5bc..8a7dd52e746 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -272,27 +272,6 @@ static struct bo *bos;
 
 #define DRM_MAJOR 226
 
-#ifndef DRM_I915_GEM_USERPTR
-
-#define DRM_I915_GEM_USERPTR  0x33
-#define DRM_IOCTL_I915_GEM_USERPTR   DRM_IOWR (DRM_COMMAND_BASE + 
DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
-
-struct drm_i915_gem_userptr {
-   __u64 user_ptr;
-   __u64 user_size;
-   __u32 flags;
-#define I915_USERPTR_READ_ONLY 0x1
-#define I915_USERPTR_UNSYNCHRONIZED 0x8000
-   /**
-* Returned handle for the object.
-*
-* Object handles are nonzero.
-*/
-   __u32 handle;
-};
-
-#endif
-
 /* We set bit 0 in the map pointer for userptr BOs so we know not to
  * munmap them on DRM_IOCTL_GEM_CLOSE.
  */
@@ -300,10 +279,6 @@ struct drm_i915_gem_userptr {
 #define IS_USERPTR(p) ((uintptr_t) (p) & USERPTR_FLAG)
 #define GET_PTR(p) ( (void *) ((uintptr_t) p & ~(uintptr_t) 1) )
 
-#ifndef I915_EXEC_BATCH_FIRST
-#define I915_EXEC_BATCH_FIRST (1 << 18)
-#endif
-
 static inline bool use_execlists(void)
 {
return devinfo.gen >= 8;
@@ -1127,9 +1102,6 @@ maybe_init(void)
fail_if(bos == NULL, "intel_aubdump: out of memory\n");
 }
 
-#define LOCAL_IOCTL_I915_GEM_EXECBUFFER2_WR \
-   DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct 
drm_i915_gem_execbuffer2)
-
 __attribute__ ((visibility ("default"))) int
 ioctl(int fd, unsigned long request, ...)
 {
@@ -1185,7 +1157,7 @@ ioctl(int fd, unsigned long request, ...)
   }
 
   case DRM_IOCTL_I915_GEM_EXECBUFFER2:
-  case LOCAL_IOCTL_I915_GEM_EXECBUFFER2_WR: {
+  case DRM_IOCTL_I915_GEM_EXECBUFFER2_WR: {
  dump_execbuffer2(fd, argp);
  if (device_override)
 return 0;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 08/16] intel/batch-decoder: handle non-contiguous binding table / surface state

2018-06-21 Thread Lionel Landwerlin
From: Scott D Phillips 

Reviewed-by: Lionel Landwerlin 
---
 src/intel/common/gen_batch_decoder.c | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/intel/common/gen_batch_decoder.c 
b/src/intel/common/gen_batch_decoder.c
index 3852f32de36..2b6978da92d 100644
--- a/src/intel/common/gen_batch_decoder.c
+++ b/src/intel/common/gen_batch_decoder.c
@@ -236,20 +236,30 @@ dump_binding_table(struct gen_batch_decode_ctx *ctx, 
uint32_t offset, int count)
   return;
}
 
+   struct gen_batch_decode_bo bo = ctx->surface_base;
const uint32_t *pointers = ctx->surface_base.map + offset;
for (int i = 0; i < count; i++) {
   if (pointers[i] == 0)
  continue;
 
-  if (pointers[i] % 32 != 0 ||
-  (pointers[i] + strct->dw_length * 4) >= ctx->surface_base.size) {
+  if (pointers[i] % 32 != 0) {
+ fprintf(ctx->fp, "pointer %u: %08x \n", i, pointers[i]);
+ continue;
+  }
+
+  uint64_t addr = ctx->surface_base.addr + pointers[i];
+  uint32_t size = strct->dw_length * 4;
+
+  if (addr < bo.addr || addr + size >= bo.addr + bo.size)
+ bo = ctx->get_bo(ctx->user_data, addr);
+
+  if (addr < bo.addr || addr + size >= bo.addr + bo.size) {
  fprintf(ctx->fp, "pointer %u: %08x \n", i, pointers[i]);
  continue;
   }
 
   fprintf(ctx->fp, "pointer %u: %08x\n", i, pointers[i]);
-  ctx_print_group(ctx, strct, ctx->surface_base.addr + pointers[i],
-  ctx->surface_base.map + pointers[i]);
+  ctx_print_group(ctx, strct, addr, bo.map + (addr - bo.addr));
}
 }
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 14/16] intel: devinfo: add simulator id

2018-06-21 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/intel/dev/gen_device_info.c | 47 ++---
 src/intel/dev/gen_device_info.h |  5 
 2 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/src/intel/dev/gen_device_info.c b/src/intel/dev/gen_device_info.c
index 8e971329892..b0ae4d18034 100644
--- a/src/intel/dev/gen_device_info.c
+++ b/src/intel/dev/gen_device_info.c
@@ -105,6 +105,7 @@ static const struct gen_device_info gen_device_info_i965 = {
   .size = 256,
},
.timestamp_frequency = 1250,
+   .simulator_id = -1,
 };
 
 static const struct gen_device_info gen_device_info_g4x = {
@@ -124,6 +125,7 @@ static const struct gen_device_info gen_device_info_g4x = {
   .size = 384,
},
.timestamp_frequency = 1250,
+   .simulator_id = -1,
 };
 
 static const struct gen_device_info gen_device_info_ilk = {
@@ -142,6 +144,7 @@ static const struct gen_device_info gen_device_info_ilk = {
   .size = 1024,
},
.timestamp_frequency = 1250,
+   .simulator_id = -1,
 };
 
 static const struct gen_device_info gen_device_info_snb_gt1 = {
@@ -170,6 +173,7 @@ static const struct gen_device_info gen_device_info_snb_gt1 
= {
   },
},
.timestamp_frequency = 1250,
+   .simulator_id = -1,
 };
 
 static const struct gen_device_info gen_device_info_snb_gt2 = {
@@ -198,6 +202,7 @@ static const struct gen_device_info gen_device_info_snb_gt2 
= {
   },
},
.timestamp_frequency = 1250,
+   .simulator_id = -1,
 };
 
 #define GEN7_FEATURES   \
@@ -236,6 +241,7 @@ static const struct gen_device_info gen_device_info_ivb_gt1 
= {
  [MESA_SHADER_GEOMETRY]  = 192,
   },
},
+   .simulator_id = 7,
 };
 
 static const struct gen_device_info gen_device_info_ivb_gt2 = {
@@ -265,6 +271,7 @@ static const struct gen_device_info gen_device_info_ivb_gt2 
= {
  [MESA_SHADER_GEOMETRY]  = 320,
   },
},
+   .simulator_id = 7,
 };
 
 static const struct gen_device_info gen_device_info_byt = {
@@ -294,6 +301,7 @@ static const struct gen_device_info gen_device_info_byt = {
  [MESA_SHADER_GEOMETRY]  = 192,
   },
},
+   .simulator_id = 10,
 };
 
 #define HSW_FEATURES \
@@ -328,6 +336,7 @@ static const struct gen_device_info gen_device_info_hsw_gt1 
= {
  [MESA_SHADER_GEOMETRY]  = 256,
   },
},
+   .simulator_id = 9,
 };
 
 static const struct gen_device_info gen_device_info_hsw_gt2 = {
@@ -356,6 +365,7 @@ static const struct gen_device_info gen_device_info_hsw_gt2 
= {
  [MESA_SHADER_GEOMETRY]  = 640,
   },
},
+   .simulator_id = 9,
 };
 
 static const struct gen_device_info gen_device_info_hsw_gt3 = {
@@ -384,6 +394,7 @@ static const struct gen_device_info gen_device_info_hsw_gt3 
= {
  [MESA_SHADER_GEOMETRY]  = 640,
   },
},
+   .simulator_id = 9,
 };
 
 /* It's unclear how well supported sampling from the hiz buffer is on GEN8,
@@ -429,7 +440,8 @@ static const struct gen_device_info gen_device_info_bdw_gt1 
= {
  [MESA_SHADER_TESS_EVAL] = 1536,
  [MESA_SHADER_GEOMETRY]  = 960,
   },
-   }
+   },
+   .simulator_id = 11,
 };
 
 static const struct gen_device_info gen_device_info_bdw_gt2 = {
@@ -453,7 +465,8 @@ static const struct gen_device_info gen_device_info_bdw_gt2 
= {
  [MESA_SHADER_TESS_EVAL] = 1536,
  [MESA_SHADER_GEOMETRY]  = 960,
   },
-   }
+   },
+   .simulator_id = 11,
 };
 
 static const struct gen_device_info gen_device_info_bdw_gt3 = {
@@ -477,7 +490,8 @@ static const struct gen_device_info gen_device_info_bdw_gt3 
= {
  [MESA_SHADER_TESS_EVAL] = 1536,
  [MESA_SHADER_GEOMETRY]  = 960,
   },
-   }
+   },
+   .simulator_id = 11,
 };
 
 static const struct gen_device_info gen_device_info_chv = {
@@ -507,7 +521,8 @@ static const struct gen_device_info gen_device_info_chv = {
  [MESA_SHADER_TESS_EVAL] = 384,
  [MESA_SHADER_GEOMETRY]  = 256,
   },
-   }
+   },
+   .simulator_id = 13,
 };
 
 #define GEN9_HW_INFO\
@@ -603,6 +618,7 @@ static const struct gen_device_info gen_device_info_skl_gt1 
= {
.num_eu_per_subslice = 6,
.l3_banks = 2,
.urb.size = 192,
+   .simulator_id = 12,
 };
 
 static const struct gen_device_info gen_device_info_skl_gt2 = {
@@ -612,6 +628,7 @@ static const struct gen_device_info gen_device_info_skl_gt2 
= {
.num_subslices = { 3, },
.num_eu_per_subslice = 8,
.l3_banks = 4,
+   .simulator_id = 12,
 };
 
 static const struct gen_device_info gen_device_info_skl_gt3 = {
@@ -621,6 +638,7 @@ static const struct gen_device_info gen_device_info_skl_gt3 
= {
.num_subslices = { 3, 3, },
.num_eu_per_subslice = 8,
.l3_banks = 8,
+   .simulator_id = 12,
 };
 
 static const struct gen_device_info gen_device_info_skl_gt4 = {
@@ -639,18 +657,21 @@ static const struct gen_device_info 
gen_device_info_skl_gt4 = {
 * only 1008KB of this will be used

[Mesa-dev] [PATCH v3 07/16] intel/tools/aubinator: aubinate ppgtt aubs

2018-06-21 Thread Lionel Landwerlin
From: Scott D Phillips 

v2: by Lionel
Fix memfd_create compilation issue
Fix pml4 address stored on 32 instead of 64bits
Return no buffer if first ppgtt page is not mapped

v3: Drop additional memfd_create() (Rafael)

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Rafael Antognolli 
---
 src/intel/tools/aubinator.c | 73 -
 1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index 05083dbcda0..8989d558b66 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -327,6 +327,68 @@ get_ggtt_batch_bo(void *user_data, uint64_t address)
 
return bo;
 }
+
+static struct phys_mem *
+ppgtt_walk(uint64_t pml4, uint64_t address)
+{
+   uint64_t shift = 39;
+   uint64_t addr = pml4;
+   for (int level = 4; level > 0; level--) {
+  struct phys_mem *table = search_phys_mem(addr);
+  if (!table)
+ return NULL;
+  int index = (address >> shift) & 0x1ff;
+  uint64_t entry = ((uint64_t *)table->data)[index];
+  if (!(entry & 1))
+ return NULL;
+  addr = entry & ~0xfff;
+  shift -= 9;
+   }
+   return search_phys_mem(addr);
+}
+
+static bool
+ppgtt_mapped(uint64_t pml4, uint64_t address)
+{
+   return ppgtt_walk(pml4, address) != NULL;
+}
+
+static struct gen_batch_decode_bo
+get_ppgtt_batch_bo(void *user_data, uint64_t address)
+{
+   struct gen_batch_decode_bo bo = {0};
+   uint64_t pml4 = *(uint64_t *)user_data;
+
+   address &= ~0xfff;
+
+   if (!ppgtt_mapped(pml4, address))
+  return bo;
+
+   /* Map everything until the first gap since we don't know how much the
+* decoder actually needs.
+*/
+   uint64_t end = address;
+   while (ppgtt_mapped(pml4, end))
+  end += 4096;
+
+   bo.addr = address;
+   bo.size = end - address;
+   bo.map = mmap(NULL, bo.size, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+   assert(bo.map != MAP_FAILED);
+
+   for (uint64_t page = address; page < end; page += 4096) {
+  struct phys_mem *phys_mem = ppgtt_walk(pml4, page);
+
+  void *res = mmap((uint8_t *)bo.map + (page - bo.addr), 4096, PROT_READ,
+   MAP_SHARED | MAP_FIXED, mem_fd, phys_mem->fd_offset);
+  assert(res != MAP_FAILED);
+   }
+
+   add_gtt_bo_map(bo, true);
+
+   return bo;
+}
+
 #define GEN_ENGINE_RENDER 1
 #define GEN_ENGINE_BLITTER 2
 
@@ -368,6 +430,7 @@ handle_trace_block(uint32_t *p)
   }
 
   (void)engine; /* TODO */
+  batch_ctx.get_bo = get_ggtt_batch_bo;
   gen_print_batch(_ctx, bo.map, bo.size, 0);
 
   clear_bo_maps();
@@ -393,7 +456,7 @@ aubinator_init(uint16_t aub_pci_id, const char *app_name)
batch_flags |= GEN_BATCH_DECODE_FLOATS;
 
gen_batch_decode_ctx_init(_ctx, , outfile, batch_flags,
- xml_path, get_ggtt_batch_bo, NULL, NULL);
+ xml_path, NULL, NULL, NULL);
batch_ctx.max_vbo_decoded_lines = max_vbo_lines;
 
char *color = GREEN_HEADER, *reset_color = NORMAL;
@@ -533,12 +596,20 @@ handle_memtrace_reg_write(uint32_t *p)
uint32_t ring_buffer_head = context[5];
uint32_t ring_buffer_tail = context[7];
uint32_t ring_buffer_start = context[9];
+   uint64_t pml4 = (uint64_t)context[49] << 32 | context[51];
 
struct gen_batch_decode_bo ring_bo = get_ggtt_batch_bo(NULL,
   ring_buffer_start);
assert(ring_bo.size > 0);
void *commands = (uint8_t *)ring_bo.map + (ring_bo.addr - 
ring_buffer_start);
 
+   if (context_descriptor & 0x100 /* ppgtt */) {
+  batch_ctx.get_bo = get_ppgtt_batch_bo;
+  batch_ctx.user_data = 
+   } else {
+  batch_ctx.get_bo = get_ggtt_batch_bo;
+   }
+
(void)engine; /* TODO */
gen_print_batch(_ctx, commands, ring_buffer_tail - ring_buffer_head,
0);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 05/16] util: rb-tree: A simple, invasive, red-black tree

2018-06-21 Thread Lionel Landwerlin
From: Jason Ekstrand 

This is a simple, invasive, liberally licensed red-black tree
implementation. It's an invasive data structure similar to the
Linux kernel linked-list where the intention is that you embed a
rb_node struct the data structure you intend to put into the
tree.

The implementation is mostly based on the one in "Introduction to
Algorithms", third edition, by Cormen, Leiserson, Rivest, and
Stein. There were a few other key design points:

 * It's an invasive data structure similar to the [Linux kernel
   linked list].

 * It uses NULL for leaves instead of a sentinel. This means a few
   algorithms differ a small bit from the ones in "Introduction to
   Algorithms".

 * All search operations are inlined so that the compiler can
   optimize away the function pointer call.
---
 src/util/Makefile.sources |   2 +
 src/util/meson.build  |   2 +
 src/util/rb_tree.c| 421 ++
 src/util/rb_tree.h| 269 
 4 files changed, 694 insertions(+)
 create mode 100644 src/util/rb_tree.c
 create mode 100644 src/util/rb_tree.h

diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
index 534520ce763..37eb0880e35 100644
--- a/src/util/Makefile.sources
+++ b/src/util/Makefile.sources
@@ -30,6 +30,8 @@ MESA_UTIL_FILES := \
ralloc.h \
rand_xor.c \
rand_xor.h \
+   rb_tree.c \
+   rb_tree.h \
register_allocate.c \
register_allocate.h \
rgtc.c \
diff --git a/src/util/meson.build b/src/util/meson.build
index c777984e28d..62425bb237b 100644
--- a/src/util/meson.build
+++ b/src/util/meson.build
@@ -54,6 +54,8 @@ files_mesa_util = files(
   'ralloc.h',
   'rand_xor.c',
   'rand_xor.h',
+  'rb_tree.c',
+  'rb_tree.h',
   'register_allocate.c',
   'register_allocate.h',
   'rgtc.c',
diff --git a/src/util/rb_tree.c b/src/util/rb_tree.c
new file mode 100644
index 000..a86fa31a809
--- /dev/null
+++ b/src/util/rb_tree.c
@@ -0,0 +1,421 @@
+/*
+ * Copyright © 2017 Jason Ekstrand
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "rb_tree.h"
+
+/** \file rb_tree.c
+ *
+ * An implementation of a red-black tree
+ *
+ * This file implements the guts of a red-black tree.  The implementation
+ * is mostly based on the one in "Introduction to Algorithms", third
+ * edition, by Cormen, Leiserson, Rivest, and Stein.  The primary
+ * divergence in our algorithms from those presented in CLRS is that we use
+ * NULL for the leaves instead of a sentinel.  This means we have to do a
+ * tiny bit more tracking in our implementation of delete but it makes the
+ * algorithms far more explicit than stashing stuff in the sentinel.
+ */
+
+#include 
+#include 
+#include 
+
+static bool
+rb_node_is_black(struct rb_node *n)
+{
+/* NULL nodes are leaves and therefore black */
+return (n == NULL) || (n->parent & 1);
+}
+
+static bool
+rb_node_is_red(struct rb_node *n)
+{
+return !rb_node_is_black(n);
+}
+
+static void
+rb_node_set_black(struct rb_node *n)
+{
+n->parent |= 1;
+}
+
+static void
+rb_node_set_red(struct rb_node *n)
+{
+n->parent &= ~1ull;
+}
+
+static void
+rb_node_copy_color(struct rb_node *dst, struct rb_node *src)
+{
+dst->parent = (dst->parent & ~1ull) | (src->parent & 1);
+}
+
+static void
+rb_node_set_parent(struct rb_node *n, struct rb_node *p)
+{
+n->parent = (n->parent & 1) | (uintptr_t)p;
+}
+
+static struct rb_node *
+rb_node_minimum(struct rb_node *node)
+{
+while (node->left)
+node = node->left;
+return node;
+}
+
+static struct rb_node *
+rb_node_maximum(struct rb_node *node)
+{
+while (node->right)
+node = node->right;
+return node;
+}
+
+void
+rb_tree_init(struct rb_tree *T)
+{
+T->root = NULL;
+}
+
+/**
+ * Replace the subtree of T rooted at u with the subtree rooted at v
+ *
+ * This is called RB-transplant in CLRS.
+ *
+ * The node to be replaced is assumed to be a 

[Mesa-dev] [PATCH v3 00/16] intel: aubinator: handle ppgtt & softpin

2018-06-21 Thread Lionel Landwerlin
Hi all,

Some fixes spotted by Rafael.

Thanks,

Jason Ekstrand (1):
  util: rb-tree: A simple, invasive, red-black tree

Lionel Landwerlin (12):
  intel: aubinator: remove unused variables
  intel: aubinator: remove standard input processing option
  intel: aubinator: rework register writes handling
  intel: aubinator: drop the 1Tb GTT mapping
  intel: aubinator: handle GGTT mappings
  intel: batch-decoder: don't asks for constant BO until decoding
  intel: batch-decoder: add missing return line
  intel: tools: update intel_aub.h
  intel: tools: import intel_aubdump
  intel: devinfo: add simulator id
  intel: intel_dump_gpu: use simulator id in captures
  intel: tools: remove drm-uapi defines

Scott D Phillips (3):
  intel/tools/aubinator: aubinate ppgtt aubs
  intel/batch-decoder: handle non-contiguous binding table / surface
state
  intel: tools: dump-gpu: dump 48-bit addresses

 src/intel/Makefile.am|2 +
 src/intel/common/gen_batch_decoder.c |   37 +-
 src/intel/dev/gen_device_info.c  |   47 +-
 src/intel/dev/gen_device_info.h  |5 +
 src/intel/tools/aubinator.c  |  625 +
 src/intel/tools/intel_aub.h  |   27 +
 src/intel/tools/intel_dump_gpu.c | 1268 ++
 src/intel/tools/intel_dump_gpu.in|  107 +++
 src/intel/tools/meson.build  |   18 +
 src/util/Makefile.sources|2 +
 src/util/meson.build |2 +
 src/util/rb_tree.c   |  421 +
 src/util/rb_tree.h   |  269 ++
 13 files changed, 2635 insertions(+), 195 deletions(-)
 create mode 100644 src/intel/tools/intel_dump_gpu.c
 create mode 100755 src/intel/tools/intel_dump_gpu.in
 create mode 100644 src/util/rb_tree.c
 create mode 100644 src/util/rb_tree.h

--
2.17.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 12/16] intel: tools: import intel_aubdump

2018-06-21 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
Acked-by: Rafael Antognolli 
---
 src/intel/Makefile.am |2 +
 src/intel/tools/intel_dump_gpu.c  | 1313 +
 src/intel/tools/intel_dump_gpu.in |  107 +++
 src/intel/tools/meson.build   |   18 +
 4 files changed, 1440 insertions(+)
 create mode 100644 src/intel/tools/intel_dump_gpu.c
 create mode 100755 src/intel/tools/intel_dump_gpu.in

diff --git a/src/intel/Makefile.am b/src/intel/Makefile.am
index 3e098a7ac9b..8448640983f 100644
--- a/src/intel/Makefile.am
+++ b/src/intel/Makefile.am
@@ -71,6 +71,8 @@ EXTRA_DIST = \
isl/meson.build \
tools/intel_sanitize_gpu.c \
tools/intel_sanitize_gpu.in \
+   tools/intel_dump_gpu.c \
+   tools/intel_dump_gpu.in \
tools/meson.build \
vulkan/meson.build \
meson.build
diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
new file mode 100644
index 000..86c133da433
--- /dev/null
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -0,0 +1,1313 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "intel_aub.h"
+
+#include "dev/gen_device_info.h"
+#include "util/macros.h"
+
+#ifndef ALIGN
+#define ALIGN(x, y) (((x) + (y)-1) & ~((y)-1))
+#endif
+
+#define MI_LOAD_REGISTER_IMM_n(n) ((0x22 << 23) | (2 * (n) - 1))
+#define MI_LRI_FORCE_POSTED   (1<<12)
+
+#define MI_BATCH_BUFFER_END (0xA << 23)
+
+#define min(a, b) ({\
+ __typeof(a) _a = (a);  \
+ __typeof(b) _b = (b);  \
+ _a < _b ? _a : _b; \
+  })
+
+#define HWS_PGA_RCSUNIT  0x02080
+#define HWS_PGA_VCSUNIT0   0x12080
+#define HWS_PGA_BCSUNIT  0x22080
+
+#define GFX_MODE_RCSUNIT   0x0229c
+#define GFX_MODE_VCSUNIT0   0x1229c
+#define GFX_MODE_BCSUNIT   0x2229c
+
+#define EXECLIST_SUBMITPORT_RCSUNIT   0x02230
+#define EXECLIST_SUBMITPORT_VCSUNIT0   0x12230
+#define EXECLIST_SUBMITPORT_BCSUNIT   0x22230
+
+#define EXECLIST_STATUS_RCSUNIT  0x02234
+#define EXECLIST_STATUS_VCSUNIT0   0x12234
+#define EXECLIST_STATUS_BCSUNIT  0x22234
+
+#define EXECLIST_SQ_CONTENTS0_RCSUNIT   0x02510
+#define EXECLIST_SQ_CONTENTS0_VCSUNIT0   0x12510
+#define EXECLIST_SQ_CONTENTS0_BCSUNIT   0x22510
+
+#define EXECLIST_CONTROL_RCSUNIT   0x02550
+#define EXECLIST_CONTROL_VCSUNIT0   0x12550
+#define EXECLIST_CONTROL_BCSUNIT   0x22550
+
+#define MEMORY_MAP_SIZE (64 /* MiB */ * 1024 * 1024)
+
+#define PTE_SIZE 4
+#define GEN8_PTE_SIZE 8
+
+#define NUM_PT_ENTRIES (ALIGN(MEMORY_MAP_SIZE, 4096) / 4096)
+#define PT_SIZE ALIGN(NUM_PT_ENTRIES * GEN8_PTE_SIZE, 4096)
+
+#define RING_SIZE (1 * 4096)
+#define PPHWSP_SIZE (1 * 4096)
+#define GEN10_LR_CONTEXT_RENDER_SIZE   (19 * 4096)
+#define GEN8_LR_CONTEXT_OTHER_SIZE   (2 * 4096)
+
+#define STATIC_GGTT_MAP_START 0
+
+#define RENDER_RING_ADDR STATIC_GGTT_MAP_START
+#define RENDER_CONTEXT_ADDR (RENDER_RING_ADDR + RING_SIZE)
+
+#define BLITTER_RING_ADDR (RENDER_CONTEXT_ADDR + PPHWSP_SIZE + 
GEN10_LR_CONTEXT_RENDER_SIZE)
+#define BLITTER_CONTEXT_ADDR (BLITTER_RING_ADDR + RING_SIZE)
+
+#define VIDEO_RING_ADDR (BLITTER_CONTEXT_ADDR + PPHWSP_SIZE + 
GEN8_LR_CONTEXT_OTHER_SIZE)
+#define VIDEO_CONTEXT_ADDR (VIDEO_RING_ADDR + RING_SIZE)
+
+#define STATIC_GGTT_MAP_END (VIDEO_CONTEXT_ADDR + PPHWSP_SIZE + 
GEN8_LR_CONTEXT_OTHER_SIZE)
+#define STATIC_GGTT_MAP_SIZE (STATIC_GGTT_MAP_END - STATIC_GGTT_MAP_START)
+
+#define CONTEXT_FLAGS (0x229)   /* Normal Priority | L3-LLC Coherency |
+   Legacy Context with no 64 bi

[Mesa-dev] [PATCH v3 11/16] intel: tools: update intel_aub.h

2018-06-21 Thread Lionel Landwerlin
Scott added new stuff in IGT.

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Rafael Antognolli 
---
 src/intel/tools/intel_aub.h | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/src/intel/tools/intel_aub.h b/src/intel/tools/intel_aub.h
index 5f0aba8e68e..9ca548edaf3 100644
--- a/src/intel/tools/intel_aub.h
+++ b/src/intel/tools/intel_aub.h
@@ -49,6 +49,12 @@
 #define CMD_AUB(7 << 29)
 
 #define CMD_AUB_HEADER (CMD_AUB | (1 << 23) | (0x05 << 16))
+
+#define CMD_MEM_TRACE_REGISTER_POLL(CMD_AUB | (0x2e << 23) | (0x02 << 16))
+#define CMD_MEM_TRACE_REGISTER_WRITE   (CMD_AUB | (0x2e << 23) | (0x03 << 16))
+#define CMD_MEM_TRACE_MEMORY_WRITE (CMD_AUB | (0x2e << 23) | (0x06 << 16))
+#define CMD_MEM_TRACE_VERSION  (CMD_AUB | (0x2e << 23) | (0x0e << 16))
+
 /* DW1 */
 # define AUB_HEADER_MAJOR_SHIFT24
 # define AUB_HEADER_MINOR_SHIFT16
@@ -92,8 +98,28 @@
 #define AUB_TRACE_MEMTYPE_PCI  (3 << 16)
 #define AUB_TRACE_MEMTYPE_GTT_ENTRY (4 << 16)
 
+#define AUB_MEM_TRACE_VERSION_FILE_VERSION 1
+
 /* DW2 */
 
+#define AUB_MEM_TRACE_VERSION_DEVICE_MASK  0xff00
+#define AUB_MEM_TRACE_VERSION_DEVICE_CNL   (15 << 8)
+
+#define AUB_MEM_TRACE_VERSION_METHOD_MASK  0x000c
+#define AUB_MEM_TRACE_VERSION_METHOD_PHY   (1 << 18)
+
+#define AUB_MEM_TRACE_REGISTER_SIZE_MASK   0x000f
+#define AUB_MEM_TRACE_REGISTER_SIZE_DWORD  (2 << 16)
+
+#define AUB_MEM_TRACE_REGISTER_SPACE_MASK  0xf000
+#define AUB_MEM_TRACE_REGISTER_SPACE_MMIO  (0 << 28)
+
+/* DW3 */
+
+#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_MASK0xf000
+#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_LOCAL   (1 << 28)
+#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY  (4 << 28)
+
 /**
  * aub_state_struct_type enum values are encoded with the top 16 bits
  * representing the type to be delivered to the .aub file, and the bottom 16
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 09/16] intel: batch-decoder: don't asks for constant BO until decoding

2018-06-21 Thread Lionel Landwerlin
With PPGTT mappings, our aubinator implementation can be quite slow if
we request a buffer that doesn't exist. Instead of doing a PPGTT walk
for invalid addresses (0 lengths), wait until we're sure we want to
decode the data.

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Rafael Antognolli 
---
 src/intel/common/gen_batch_decoder.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/intel/common/gen_batch_decoder.c 
b/src/intel/common/gen_batch_decoder.c
index 2b6978da92d..81d8298c28b 100644
--- a/src/intel/common/gen_batch_decoder.c
+++ b/src/intel/common/gen_batch_decoder.c
@@ -562,9 +562,8 @@ decode_3dstate_constant(struct gen_batch_decode_ctx *ctx, 
const uint32_t *p)
struct gen_group *body =
   gen_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY");
 
-   uint32_t read_length[4];
-   struct gen_batch_decode_bo buffer[4];
-   memset(buffer, 0, sizeof(buffer));
+   uint32_t read_length[4] = {0};
+   uint64_t read_addr[4];
 
struct gen_field_iterator outer;
gen_field_iterator_init(, inst, p, 0, false);
@@ -581,18 +580,24 @@ decode_3dstate_constant(struct gen_batch_decode_ctx *ctx, 
const uint32_t *p)
  if (sscanf(iter.name, "Read Length[%d]", ) == 1) {
 read_length[idx] = iter.raw_value;
  } else if (sscanf(iter.name, "Buffer[%d]", ) == 1) {
-buffer[idx] = ctx_get_bo(ctx, iter.raw_value);
+read_addr[idx] = iter.raw_value;
  }
   }
 
   for (int i = 0; i < 4; i++) {
- if (read_length[i] == 0 || buffer[i].map == NULL)
+ if (read_length[i] == 0)
 continue;
 
+ struct gen_batch_decode_bo buffer = ctx_get_bo(ctx, read_addr[i]);
+ if (!buffer.map) {
+fprintf(ctx->fp, "constant buffer %d unavailable\n", i);
+continue;
+ }
+
  unsigned size = read_length[i] * 32;
  fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
 
- ctx_print_buffer(ctx, buffer[i], size, 0, -1);
+ ctx_print_buffer(ctx, buffer, size, 0, -1);
   }
}
 }
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 10/16] intel: batch-decoder: add missing return line

2018-06-21 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
Reviewed-by: Rafael Antognolli 
---
 src/intel/common/gen_batch_decoder.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/common/gen_batch_decoder.c 
b/src/intel/common/gen_batch_decoder.c
index 81d8298c28b..fc0ff95a476 100644
--- a/src/intel/common/gen_batch_decoder.c
+++ b/src/intel/common/gen_batch_decoder.c
@@ -854,7 +854,7 @@ gen_print_batch(struct gen_batch_decode_ctx *ctx,
  }
 
  if (next_batch.map == NULL) {
-fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable",
+fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable\n",
 next_batch.addr);
  }
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 04/16] intel: aubinator: drop the 1Tb GTT mapping

2018-06-21 Thread Lionel Landwerlin
Now that we're softpinning the address of our BOs in anv & i965, the
addresses selected start at the top of the addressing space. This is a
problem for the current implementation of aubinator which uses only a
40bit mmapped address space.

This change keeps track of all the memory writes from the aub file and
fetch them on request by the batch decoder. As a result we can get rid
of the 1<<40 mmapped address space and only rely on the mmap aub file
\o/

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Rafael Antognolli 
---
 src/intel/tools/aubinator.c | 130 +---
 1 file changed, 75 insertions(+), 55 deletions(-)

diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index d0b26219eb4..3b04ba3f431 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -37,10 +37,12 @@
 #include 
 #include 
 
+#include "util/list.h"
 #include "util/macros.h"
 
 #include "common/gen_decoder.h"
 #include "common/gen_disasm.h"
+#include "common/gen_gem.h"
 #include "intel_aub.h"
 
 /* Below is the only command missing from intel_aub.h in libdrm
@@ -68,13 +70,45 @@ char *input_file = NULL, *xml_path = NULL;
 struct gen_device_info devinfo;
 struct gen_batch_decode_ctx batch_ctx;
 
-uint64_t gtt_size, gtt_end;
-void *gtt;
+struct bo_map {
+   struct list_head link;
+   struct gen_batch_decode_bo bo;
+};
+
+static struct list_head maps;
 
 FILE *outfile;
 
 struct brw_instruction;
 
+static void
+add_gtt_bo_map(struct gen_batch_decode_bo bo)
+{
+   struct bo_map *m = calloc(1, sizeof(*m));
+
+   m->bo = bo;
+   list_add(>link, );
+}
+
+static void
+clear_bo_maps(void)
+{
+   list_for_each_entry_safe(struct bo_map, i, , link) {
+  list_del(>link);
+  free(i);
+   }
+}
+
+static struct gen_batch_decode_bo
+get_gen_batch_bo(void *user_data, uint64_t address)
+{
+   list_for_each_entry(struct bo_map, i, , link)
+  if (i->bo.addr <= address && i->bo.addr + i->bo.size > address)
+ return i->bo;
+
+   return (struct gen_batch_decode_bo) { .map = NULL };
+}
+
 #define GEN_ENGINE_RENDER 1
 #define GEN_ENGINE_BLITTER 2
 
@@ -84,26 +118,23 @@ handle_trace_block(uint32_t *p)
int operation = p[1] & AUB_TRACE_OPERATION_MASK;
int type = p[1] & AUB_TRACE_TYPE_MASK;
int address_space = p[1] & AUB_TRACE_ADDRESS_SPACE_MASK;
-   uint64_t offset = p[3];
-   uint32_t size = p[4];
int header_length = p[0] & 0x;
-   uint32_t *data = p + header_length + 2;
int engine = GEN_ENGINE_RENDER;
-
-   if (devinfo.gen >= 8)
-  offset += (uint64_t) p[5] << 32;
+   struct gen_batch_decode_bo bo = {
+  .map = p + header_length + 2,
+  /* Addresses written by aubdump here are in canonical form but the batch
+   * decoder always gives us addresses with the top 16bits zeroed, so do
+   * the same here.
+   */
+  .addr = gen_48b_address((devinfo.gen >= 8 ? ((uint64_t) p[5] << 32) : 0) 
|
+  ((uint64_t) p[3])),
+  .size = p[4],
+   };
 
switch (operation) {
case AUB_TRACE_OP_DATA_WRITE:
-  if (address_space != AUB_TRACE_MEMTYPE_GTT)
- break;
-  if (gtt_size < offset + size) {
- fprintf(stderr, "overflow gtt space: %s\n", strerror(errno));
- exit(EXIT_FAILURE);
-  }
-  memcpy((char *) gtt + offset, data, size);
-  if (gtt_end < offset + size)
- gtt_end = offset + size;
+  if (address_space == AUB_TRACE_MEMTYPE_GTT)
+ add_gtt_bo_map(bo);
   break;
case AUB_TRACE_OP_COMMAND_WRITE:
   switch (type) {
@@ -119,27 +150,13 @@ handle_trace_block(uint32_t *p)
   }
 
   (void)engine; /* TODO */
-  gen_print_batch(_ctx, data, size, 0);
+  gen_print_batch(_ctx, bo.map, bo.size, 0);
 
-  gtt_end = 0;
+  clear_bo_maps();
   break;
}
 }
 
-static struct gen_batch_decode_bo
-get_gen_batch_bo(void *user_data, uint64_t address)
-{
-   if (address > gtt_end)
-  return (struct gen_batch_decode_bo) { .map = NULL };
-
-   /* We really only have one giant address range */
-   return (struct gen_batch_decode_bo) {
-  .addr = 0,
-  .map = gtt,
-  .size = gtt_size
-   };
-}
-
 static void
 aubinator_init(uint16_t aub_pci_id, const char *app_name)
 {
@@ -289,34 +306,44 @@ handle_memtrace_reg_write(uint32_t *p)
}
 
const uint32_t pphwsp_size = 4096;
-   uint32_t *context = (uint32_t*)(gtt + (context_descriptor & 0xf000) + 
pphwsp_size);
+   uint32_t pphwsp_addr = context_descriptor & 0xf000;
+   struct gen_batch_decode_bo pphwsp_bo = get_gen_batch_bo(NULL, pphwsp_addr);
+   uint32_t *context = (uint32_t *)((uint8_t *)pphwsp_bo.map +
+(pphwsp_bo.addr - pphwsp_addr) +
+pphwsp_size);
+
uint32_t ring_buffer_head = context[5];
uint32_t 

[Mesa-dev] [PATCH v3 02/16] intel: aubinator: remove standard input processing option

2018-06-21 Thread Lionel Landwerlin
On a follow up commit in this series, we stop copying the data from
the mmap'ed file into our big gtt mmap, and start referencing data in
it directly. So reallocating the read buffer and adding more data from
stdin wouldn't work. For that reason, let's stop supporting stdin
process.

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Rafael Antognolli 
---
 src/intel/tools/aubinator.c | 102 +---
 1 file changed, 12 insertions(+), 90 deletions(-)

diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index 0e2fd5fb34a..92db1ed0b31 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -334,17 +334,6 @@ aub_file_open(const char *filename)
return file;
 }
 
-static struct aub_file *
-aub_file_stdin(void)
-{
-   struct aub_file *file;
-
-   file = calloc(1, sizeof *file);
-   file->stream = stdin;
-
-   return file;
-}
-
 #define TYPE(dw)   (((dw) >> 29) & 7)
 #define OPCODE(dw) (((dw) >> 23) & 0x3f)
 #define SUBOPCODE(dw)  (((dw) >> 16) & 0x7f)
@@ -382,8 +371,7 @@ aub_file_decode_batch(struct aub_file *file)
uint32_t *p, h, *new_cursor;
int header_length, bias;
 
-   if (file->end - file->cursor < 1)
-  return AUB_ITEM_DECODE_NEED_MORE_DATA;
+   assert(file->cursor < file->end);
 
p = file->cursor;
h = *p;
@@ -405,13 +393,11 @@ aub_file_decode_batch(struct aub_file *file)
 
new_cursor = p + header_length + bias;
if ((h & 0x) == MAKE_HEADER(TYPE_AUB, OPCODE_AUB, SUBOPCODE_BLOCK)) 
{
-  if (file->end - file->cursor < 4)
- return AUB_ITEM_DECODE_NEED_MORE_DATA;
+  assert(file->end - file->cursor >= 4);
   new_cursor += p[4] / 4;
}
 
-   if (new_cursor > file->end)
-  return AUB_ITEM_DECODE_NEED_MORE_DATA;
+   assert(new_cursor <= file->end);
 
switch (h & 0x) {
case MAKE_HEADER(TYPE_AUB, OPCODE_AUB, SUBOPCODE_HEADER):
@@ -452,48 +438,6 @@ aub_file_more_stuff(struct aub_file *file)
return file->cursor < file->end || (file->stream && !feof(file->stream));
 }
 
-#define AUB_READ_BUFFER_SIZE (4096)
-#define MAX(a, b) ((a) < (b) ? (b) : (a))
-
-static void
-aub_file_data_grow(struct aub_file *file)
-{
-   size_t old_size = (file->mem_end - file->map) * 4;
-   size_t new_size = MAX(old_size * 2, AUB_READ_BUFFER_SIZE);
-   uint32_t *new_start = realloc(file->map, new_size);
-
-   file->cursor = new_start + (file->cursor - file->map);
-   file->end = new_start + (file->end - file->map);
-   file->map = new_start;
-   file->mem_end = file->map + (new_size / 4);
-}
-
-static bool
-aub_file_data_load(struct aub_file *file)
-{
-   size_t r;
-
-   if (file->stream == NULL)
-  return false;
-
-   /* First remove any consumed data */
-   if (file->cursor > file->map) {
-  memmove(file->map, file->cursor,
-  (file->end - file->cursor) * 4);
-  file->end -= file->cursor - file->map;
-  file->cursor = file->map;
-   }
-
-   /* Then load some new data in */
-   if ((file->mem_end - file->end) < (AUB_READ_BUFFER_SIZE / 4))
-  aub_file_data_grow(file);
-
-   r = fread(file->end, 1, (file->mem_end - file->end) * 4, file->stream);
-   file->end += r / 4;
-
-   return r != 0;
-}
-
 static void
 setup_pager(void)
 {
@@ -525,9 +469,8 @@ static void
 print_help(const char *progname, FILE *file)
 {
fprintf(file,
-   "Usage: %s [OPTION]... [FILE]\n"
-   "Decode aub file contents from either FILE or the standard 
input.\n\n"
-   "A valid --gen option must be provided.\n\n"
+   "Usage: %s [OPTION]... FILE\n"
+   "Decode aub file contents from FILE.\n\n"
"  --help display this help and exit\n"
"  --gen=platform decode for given platform (3 letter 
platform name)\n"
"  --headers  decode only command headers\n"
@@ -596,14 +539,14 @@ int main(int argc, char *argv[])
   }
}
 
-   if (help || argc == 1) {
+   if (optind < argc)
+  input_file = argv[optind];
+
+   if (help || !input_file) {
   print_help(argv[0], stderr);
   exit(0);
}
 
-   if (optind < argc)
-  input_file = argv[optind];
-
/* Do this before we redirect stdout to pager. */
if (option_color == COLOR_AUTO)
   option_color = isatty(1) ? COLOR_ALWAYS : COLOR_NEVER;
@@ -611,11 +554,6 @@ int main(int argc, char *argv[])
if (isatty(1) && pager)
   setup_pager();
 
-   if (input_file == NULL)
-  file = aub_file_stdin();
-   else
-  file = aub_file_open(input_file);
-
/* mmap a terabyte for our gtt space. */
gtt_size = 1ull << 40;
gtt = mmap(NULL, gtt_size, PROT_READ | PROT_WRITE,
@@ -625,26 +563,10 @

[Mesa-dev] [PATCH v3 03/16] intel: aubinator: rework register writes handling

2018-06-21 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
Reviewed-by: Rafael Antognolli 
---
 src/intel/tools/aubinator.c | 82 -
 1 file changed, 54 insertions(+), 28 deletions(-)

diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index 92db1ed0b31..d0b26219eb4 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -224,46 +224,72 @@ handle_memtrace_version(uint32_t *p)
 static void
 handle_memtrace_reg_write(uint32_t *p)
 {
+   static struct execlist_regs {
+  uint32_t render_elsp[4];
+  int render_elsp_index;
+  uint32_t blitter_elsp[4];
+  int blitter_elsp_index;
+   } state = {};
+
uint32_t offset = p[1];
uint32_t value = p[5];
+
int engine;
-   static int render_elsp_writes = 0;
-   static int blitter_elsp_writes = 0;
-   static int render_elsq0 = 0;
-   static int blitter_elsq0 = 0;
-   uint8_t *pphwsp;
-
-   if (offset == 0x2230) {
-  render_elsp_writes++;
+   uint64_t context_descriptor;
+
+   switch (offset) {
+   case 0x2230: /* render elsp */
+  state.render_elsp[state.render_elsp_index++] = value;
+  if (state.render_elsp_index < 4)
+ return;
+
+  state.render_elsp_index = 0;
   engine = GEN_ENGINE_RENDER;
-   } else if (offset == 0x22230) {
-  blitter_elsp_writes++;
+  context_descriptor = (uint64_t)state.render_elsp[2] << 32 |
+ state.render_elsp[3];
+  break;
+   case 0x22230: /* blitter elsp */
+  state.blitter_elsp[state.blitter_elsp_index++] = value;
+  if (state.blitter_elsp_index < 4)
+ return;
+
+  state.blitter_elsp_index = 0;
   engine = GEN_ENGINE_BLITTER;
-   } else if (offset == 0x2510) {
-  render_elsq0 = value;
-   } else if (offset == 0x22510) {
-  blitter_elsq0 = value;
-   } else if (offset == 0x2550 || offset == 0x22550) {
-  /* nothing */;
-   } else {
+  context_descriptor = (uint64_t)state.blitter_elsp[2] << 32 |
+ state.blitter_elsp[3];
+  break;
+   case 0x2510: /* render elsq0 lo */
+  state.render_elsp[3] = value;
   return;
-   }
-
-   if (render_elsp_writes > 3 || blitter_elsp_writes > 3) {
-  render_elsp_writes = blitter_elsp_writes = 0;
-  pphwsp = (uint8_t*)gtt + (value & 0xf000);
-   } else if (offset == 0x2550) {
+  break;
+   case 0x2514: /* render elsq0 hi */
+  state.render_elsp[2] = value;
+  return;
+  break;
+   case 0x22510: /* blitter elsq0 lo */
+  state.blitter_elsp[3] = value;
+  return;
+  break;
+   case 0x22514: /* blitter elsq0 hi */
+  state.blitter_elsp[2] = value;
+  return;
+  break;
+   case 0x2550: /* render elsc */
   engine = GEN_ENGINE_RENDER;
-  pphwsp = (uint8_t*)gtt + (render_elsq0 & 0xf000);
-   } else if (offset == 0x22550) {
+  context_descriptor = (uint64_t)state.render_elsp[2] << 32 |
+ state.render_elsp[3];
+  break;
+   case 0x22550: /* blitter elsc */
   engine = GEN_ENGINE_BLITTER;
-  pphwsp = (uint8_t*)gtt + (blitter_elsq0 & 0xf000);
-   } else {
+  context_descriptor = (uint64_t)state.blitter_elsp[2] << 32 |
+ state.blitter_elsp[3];
+  break;
+   default:
   return;
}
 
const uint32_t pphwsp_size = 4096;
-   uint32_t *context = (uint32_t*)(pphwsp + pphwsp_size);
+   uint32_t *context = (uint32_t*)(gtt + (context_descriptor & 0xf000) + 
pphwsp_size);
uint32_t ring_buffer_head = context[5];
uint32_t ring_buffer_tail = context[7];
uint32_t ring_buffer_start = context[9];
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 01/16] intel: aubinator: remove unused variables

2018-06-21 Thread Lionel Landwerlin
These memory offsets are stored in the gen_batch_decode_ctx.

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Rafael Antognolli 
---
 src/intel/tools/aubinator.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index 77676e9c670..0e2fd5fb34a 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -70,11 +70,6 @@ struct gen_batch_decode_ctx batch_ctx;
 
 uint64_t gtt_size, gtt_end;
 void *gtt;
-uint64_t general_state_base;
-uint64_t surface_state_base;
-uint64_t dynamic_state_base;
-uint64_t instruction_base;
-uint64_t instruction_bound;
 
 FILE *outfile;
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 06/16] intel: aubinator: handle GGTT mappings

2018-06-20 Thread Lionel Landwerlin

On 20/06/18 19:19, Rafael Antognolli wrote:

On Tue, Jun 19, 2018 at 02:45:21PM +0100, Lionel Landwerlin wrote:

We use memfd to store physical pages as they get read/written to and
the GGTT entries translating virtual address to physical pages.

Based on a commit by Scott Phillips.

Signed-off-by: Lionel Landwerlin 
---
  src/intel/tools/aubinator.c | 256 ++--
  1 file changed, 243 insertions(+), 13 deletions(-)

diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index f70038376be..962546d360c 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -39,12 +39,23 @@
  
  #include "util/list.h"

  #include "util/macros.h"
+#include "util/rb_tree.h"
  
  #include "common/gen_decoder.h"

  #include "common/gen_disasm.h"
  #include "common/gen_gem.h"
  #include "intel_aub.h"
  
+#ifndef HAVE_MEMFD_CREATE

+#include 
+
+static inline int
+memfd_create(const char *name, unsigned int flags)
+{
+   return syscall(SYS_memfd_create, name, flags);
+}
+#endif
+
  /* Below is the only command missing from intel_aub.h in libdrm
   * So, reuse intel_aub.h from libdrm and #define the
   * AUB_MI_BATCH_BUFFER_END as below
@@ -73,9 +84,27 @@ struct gen_batch_decode_ctx batch_ctx;
  struct bo_map {
 struct list_head link;
 struct gen_batch_decode_bo bo;
+   bool unmap_after_use;
+};
+
+struct ggtt_entry {
+   struct rb_node node;
+   uint64_t virt_addr;
+   uint64_t phys_addr;
+};
+
+struct phys_mem {
+   struct rb_node node;
+   uint64_t fd_offset;
+   uint64_t phys_addr;
+   uint8_t *data;
  };
  
  static struct list_head maps;

+static struct rb_tree ggtt = {NULL};
+static struct rb_tree mem = {NULL};
+int mem_fd = -1;
+off_t mem_fd_len = 0;
  
  FILE *outfile;
  
@@ -92,11 +121,12 @@ field(uint32_t value, int start, int end)

  struct brw_instruction;
  
  static void

-add_gtt_bo_map(struct gen_batch_decode_bo bo)
+add_gtt_bo_map(struct gen_batch_decode_bo bo, bool unmap_after_use)
  {
 struct bo_map *m = calloc(1, sizeof(*m));
  
 m->bo = bo;

+   m->unmap_after_use = unmap_after_use;
 list_add(>link, );
  }
  
@@ -104,21 +134,208 @@ static void

  clear_bo_maps(void)
  {
 list_for_each_entry_safe(struct bo_map, i, , link) {
+  if (i->unmap_after_use)
+ munmap((void *)i->bo.map, i->bo.size);
list_del(>link);
free(i);
 }
  }
  
+static inline struct ggtt_entry *

+ggtt_entry_next(struct ggtt_entry *entry)
+{
+   if (!entry)
+  return NULL;
+   struct rb_node *node = rb_node_next(>node);
+   if (!node)
+  return NULL;
+   return rb_node_data(struct ggtt_entry, node, node);
+}
+
+static inline int
+cmp_uint64(uint64_t a, uint64_t b)
+{
+   if (a < b)
+  return -1;
+   if (a > b)
+  return 1;
+   return 0;
+}
+
+static inline int
+cmp_ggtt_entry(const struct rb_node *node, const void *addr)
+{
+   struct ggtt_entry *entry = rb_node_data(struct ggtt_entry, node, node);
+   return cmp_uint64(entry->virt_addr, *(uint64_t *)addr);
+}
+
+static struct ggtt_entry *
+ensure_ggtt_entry(struct rb_tree *tree, uint64_t virt_addr)
+{
+   struct rb_node *node = rb_tree_search_sloppy(, _addr,
+cmp_ggtt_entry);
+   int cmp = 0;
+   if (!node || (cmp = cmp_ggtt_entry(node, _addr))) {
+  struct ggtt_entry *new_entry = calloc(1, sizeof(*new_entry));
+  new_entry->virt_addr = virt_addr;
+  rb_tree_insert_at(, node, _entry->node, cmp > 0);
+  node = _entry->node;
+   }
+
+   return rb_node_data(struct ggtt_entry, node, node);
+}
+
+static struct ggtt_entry *
+search_ggtt_entry(uint64_t virt_addr)
+{
+   virt_addr &= ~0xfff;
+
+   struct rb_node *node = rb_tree_search(, _addr, cmp_ggtt_entry);
+
+   if (!node)
+  return NULL;
+
+   return rb_node_data(struct ggtt_entry, node, node);
+}
+
+static inline int
+cmp_phys_mem(const struct rb_node *node, const void *addr)
+{
+   struct phys_mem *mem = rb_node_data(struct phys_mem, node, node);
+   return cmp_uint64(mem->phys_addr, *(uint64_t *)addr);
+}
+
+static struct phys_mem *
+ensure_phys_mem(uint64_t phys_addr)
+{
+   struct rb_node *node = rb_tree_search_sloppy(, _addr, 
cmp_phys_mem);
+   int cmp = 0;
+   if (!node || (cmp = cmp_phys_mem(node, _addr))) {
+  struct phys_mem *new_mem = calloc(1, sizeof(*new_mem));
+  new_mem->phys_addr = phys_addr;
+  new_mem->fd_offset = mem_fd_len;
+
+  int ftruncate_res = ftruncate(mem_fd, mem_fd_len += 4096);
+  assert(ftruncate_res == 0);
+
+  new_mem->data = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED,
+   mem_fd, new_mem->fd_offset);
+  assert(new_mem->data != MAP_FAILED);
+
+  rb_tree_insert_at(, node, _mem->node, cmp > 0);
+  node = _mem->node;
+   }
+
+   return rb_node_data(struct phys_mem, node, node);
+}
+
+static st

[Mesa-dev] [PATCH v4 6/7] mesa: add INTEL_blackhole_render

2018-06-20 Thread Lionel Landwerlin
v2: Implement missing Enable/Disable (Emil)

v3: Drop unused NewIntelBlackholeRender (Ken)

v4: Bring back NewIntelBlackholeRender as i965 implementation uses it
again (Lionel)

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Kenneth Graunke 
---
 src/mesa/main/enable.c   | 14 ++
 src/mesa/main/extensions_table.h |  1 +
 src/mesa/main/mtypes.h   |  8 
 3 files changed, 23 insertions(+)

diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
index d1b2f3a9625..7b482bdf5db 100644
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -1137,6 +1137,16 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, 
GLboolean state)
  ctx->Color.BlendCoherent = state;
  break;
 
+  case GL_BLACKHOLE_RENDER_INTEL:
+ if (!_mesa_has_INTEL_blackhole_render(ctx))
+goto invalid_enum_error;
+ if (ctx->IntelBlackholeRender == state)
+return;
+ FLUSH_VERTICES(ctx, 0);
+ ctx->NewDriverState |= ctx->DriverFlags.NewIntelBlackholeRender;
+ ctx->IntelBlackholeRender = state;
+ break;
+
   default:
  goto invalid_enum_error;
}
@@ -1776,6 +1786,10 @@ _mesa_IsEnabled( GLenum cap )
  CHECK_EXTENSION(MESA_tile_raster_order);
  return ctx->TileRasterOrderIncreasingY;
 
+  case GL_BLACKHOLE_RENDER_INTEL:
+ CHECK_EXTENSION(INTEL_blackhole_render);
+ return ctx->IntelBlackholeRender;
+
   default:
  goto invalid_enum_error;
}
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index 1c55df8a228..cb41aa8be4a 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -309,6 +309,7 @@ EXT(IBM_texture_mirrored_repeat , dummy_true
 
 EXT(INGR_blend_func_separate, EXT_blend_func_separate  
  , GLL,  x ,  x ,  x , 1999)
 
+EXT(INTEL_blackhole_render  , INTEL_blackhole_render   
  ,  30,  30,  x , ES2, 2018)
 EXT(INTEL_conservative_rasterization, INTEL_conservative_rasterization 
  ,  x , GLC,  x ,  31, 2013)
 EXT(INTEL_performance_query , INTEL_performance_query  
  , GLL, GLC,  x , ES2, 2013)
 
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 220751ba7bb..605b4973f13 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -4240,6 +4240,7 @@ struct gl_extensions
GLboolean ATI_fragment_shader;
GLboolean ATI_separate_stencil;
GLboolean GREMEDY_string_marker;
+   GLboolean INTEL_blackhole_render;
GLboolean INTEL_conservative_rasterization;
GLboolean INTEL_performance_query;
GLboolean KHR_blend_equation_advanced;
@@ -4585,6 +4586,11 @@ struct gl_driver_flags
 
/** Programmable sample location state for gl_context::DrawBuffer */
uint64_t NewSampleLocations;
+
+   /**
+* gl_context::IntelBlackholeRender
+*/
+   uint64_t NewIntelBlackholeRender;
 };
 
 struct gl_buffer_binding
@@ -5006,6 +5012,8 @@ struct gl_context
GLfloat ConservativeRasterDilate;
GLenum16 ConservativeRasterMode;
 
+   GLboolean IntelBlackholeRender; /**< GL_INTEL_blackhole_render */
+
/** Does glVertexAttrib(0) alias glVertex()? */
bool _AttribZeroAliasesVertex;
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 1/7] i965: add force posted register load

2018-06-20 Thread Lionel Landwerlin
Inspired by what is already in the kernel.

Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_context.h   |  2 ++
 src/mesa/drivers/dri/i965/brw_defines.h   |  1 +
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 13 +
 3 files changed, 16 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 2613b9fda22..0880d18b6f0 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1410,6 +1410,8 @@ void brw_store_register_mem64(struct brw_context *brw,
   struct brw_bo *bo, uint32_t reg, uint32_t 
offset);
 void brw_load_register_imm32(struct brw_context *brw,
  uint32_t reg, uint32_t imm);
+void brw_load_register_imm32_force_posted(struct brw_context *brw,
+  uint32_t reg, uint32_t imm);
 void brw_load_register_imm64(struct brw_context *brw,
  uint32_t reg, uint64_t imm);
 void brw_load_register_reg(struct brw_context *brw, uint32_t src,
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 855f1c7d744..320426d6944 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1428,6 +1428,7 @@ enum brw_pixel_shader_coverage_mask_mode {
 
 #define MI_STORE_DATA_IMM  (CMD_MI | (0x20 << 23))
 #define MI_LOAD_REGISTER_IMM   (CMD_MI | (0x22 << 23))
+#define  MI_LOAD_REGISTER_IMM_FORCE_POSTED  (1 << 12)
 #define MI_LOAD_REGISTER_REG   (CMD_MI | (0x2A << 23))
 
 #define MI_FLUSH_DW(CMD_MI | (0x26 << 23))
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index df999ffeb1d..250a8e812e5 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -1192,6 +1192,19 @@ brw_load_register_imm32(struct brw_context *brw, 
uint32_t reg, uint32_t imm)
ADVANCE_BATCH();
 }
 
+void
+brw_load_register_imm32_force_posted(struct brw_context *brw, uint32_t reg, 
uint32_t imm)
+{
+   assert(brw->screen->devinfo.gen >= 6);
+
+   BEGIN_BATCH(3);
+   OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2) |
+ MI_LOAD_REGISTER_IMM_FORCE_POSTED);
+   OUT_BATCH(reg);
+   OUT_BATCH(imm);
+   ADVANCE_BATCH();
+}
+
 /*
  * Write a 64-bit register using immediate data.
  */
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 2/7] i965: add a skylake only pipe control recommendation

2018-06-20 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_pipe_control.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index 122ac260703..1b89e55c396 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -158,6 +158,19 @@ brw_emit_pipe_control(struct brw_context *brw, uint32_t 
flags,
  }
   }
 
+  /* Project: SKL
+   *
+   * "PIPECONTROL command with “Command Streamer Stall Enable” must be
+   * programmed prior to programming a PIPECONTROL command with LRI Post
+   * Sync Operation in GPGPU mode of operation (i.e when PIPELINE_SELECT
+   * command is set to GPGPU mode of operation)."
+   */
+  if (devinfo->is_skylake &&
+  brw->last_pipeline == BRW_COMPUTE_PIPELINE &&
+  (flags & PIPE_CONTROL_LRI_WRITE_IMMEDIATE)) {
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
+  }
+
   if (devinfo->gen == 10)
  gen10_add_rcpfe_workaround_bits();
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 3/7] i965: pipecontrol: allow NULL bo for writing registers

2018-06-20 Thread Lionel Landwerlin
When doing a LRI Post Sync operation, you can put the register offset
in the lower 32bits of the address but won't need a BO.

Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_pipe_control.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index 1b89e55c396..874e9f0a9e8 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -179,6 +179,9 @@ brw_emit_pipe_control(struct brw_context *brw, uint32_t 
flags,
   OUT_BATCH(flags);
   if (bo) {
  OUT_RELOC64(bo, RELOC_WRITE, offset);
+  } else if ((flags & PIPE_CONTROL_LRI_WRITE_IMMEDIATE) != 0) {
+ OUT_BATCH(offset);
+ OUT_BATCH(0);
   } else {
  OUT_BATCH(0);
  OUT_BATCH(0);
@@ -210,6 +213,9 @@ brw_emit_pipe_control(struct brw_context *brw, uint32_t 
flags,
   OUT_BATCH(flags);
   if (bo) {
  OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, gen6_gtt | offset);
+  } else if (devinfo->gen >= 7 &&
+ (flags & PIPE_CONTROL_LRI_WRITE_IMMEDIATE) != 0) {
+ OUT_BATCH(offset);
   } else {
  OUT_BATCH(0);
   }
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 7/7] i965: enable INTEL_blackhole_render

2018-06-20 Thread Lionel Landwerlin
v2: condition the extension on context isolation support from the
kernel (Chris)

v3: (Lionel)

The initial version of this change used a feature of the Gen7+
command parser to turn the primitive instructions into no-ops.
Unfortunately this doesn't play well with how we're using the
hardware outside of the user submitted commands. For example
resolves are implicit operations which should not be turned into
no-ops as part of the previously submitted commands (before
blackhole_render is enabled) might not be disabled. For example
this sequence :

   glClear();
   glEnable(GL_BLACKHOLE_RENDER_INTEL);
   glDrawArrays(...);
   glReadPixels(...);
   glDisable(GL_BLACKHOLE_RENDER_INTEL);

While clear has been emitted outside the blackhole render, it
should still be resolved properly in the read pixels. Hence we
need to be more selective and only disable user submitted
commands.

This v3 manually turns primitives into MI_NOOP if blackhole render
is enabled. This lets us enable this feature on any platform.

v4: Limit support to gen7.5+ (Lionel)

v5: Enable Gen7.5 support again, requires a kernel update of the
command parser (Lionel)

v6: Disable Gen7.5 again... Kernel devs want these patches landed
before they accept the kernel patches to whitelist INSTPM (Lionel)

Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_clear.c |  3 +
 src/mesa/drivers/dri/i965/brw_context.h   |  2 +
 src/mesa/drivers/dri/i965/brw_defines.h   |  8 ++-
 src/mesa/drivers/dri/i965/brw_misc_state.c| 56 +++
 src/mesa/drivers/dri/i965/brw_state.h |  4 ++
 src/mesa/drivers/dri/i965/brw_state_upload.c  |  2 +
 src/mesa/drivers/dri/i965/genX_state_upload.c |  4 ++
 src/mesa/drivers/dri/i965/intel_extensions.c  |  8 +++
 src/mesa/drivers/dri/i965/intel_fbo.c |  6 ++
 src/mesa/drivers/dri/i965/intel_pixel_read.c  |  3 +
 src/mesa/drivers/dri/i965/intel_tex_copy.c|  3 +
 src/mesa/drivers/dri/i965/intel_tex_image.c   |  5 ++
 12 files changed, 103 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_clear.c 
b/src/mesa/drivers/dri/i965/brw_clear.c
index b097dfe346c..d3e360b3e23 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -247,6 +247,9 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
if (!_mesa_check_conditional_render(ctx))
   return;
 
+   if (ctx->IntelBlackholeRender)
+  return;
+
if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
   brw->front_buffer_dirty = true;
}
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 0880d18b6f0..23602df2138 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -218,6 +218,7 @@ enum brw_state_id {
BRW_STATE_CONSERVATIVE_RASTERIZATION,
BRW_STATE_DRAW_CALL,
BRW_STATE_AUX,
+   BRW_STATE_CS_NOOP,
BRW_NUM_STATE_BITS
 };
 
@@ -309,6 +310,7 @@ enum brw_state_id {
 #define BRW_NEW_CONSERVATIVE_RASTERIZATION (1ull << 
BRW_STATE_CONSERVATIVE_RASTERIZATION)
 #define BRW_NEW_DRAW_CALL   (1ull << BRW_STATE_DRAW_CALL)
 #define BRW_NEW_AUX_STATE   (1ull << BRW_STATE_AUX)
+#define BRW_NEW_CS_NOOP (1ull << BRW_STATE_CS_NOOP)
 
 struct brw_state_flags {
/** State update flags signalled by mesa internals */
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 320426d6944..4e2d6acc706 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1651,11 +1651,17 @@ enum brw_pixel_shader_coverage_mask_mode {
 #define GEN10_CACHE_MODE_SS0x0e420
 #define GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
 
-#define INSTPM 0x20c0
+#define INSTPM 0x20c0 /* Gen6-8 */
 # define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6)
+# define INSTPM_GLOBAL_DEBUG_ENABLE(1 << 4)
+# define INSTPM_MEDIA_INSTRUCTION_DISABLE  (1 << 3)
+# define INSTPM_3D_RENDERER_INSTRUCTION_DISABLE(1 << 2)
+# define INSTPM_3D_STATE_INSTRUCTION_DISABLE   (1 << 1)
 
 #define CS_DEBUG_MODE2 0x20d8 /* Gen9+ */
 # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
+# define CSDBG2_MEDIA_INSTRUCTION_DISABLE  (1 << 1)
+# define CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE(1 << 0)
 
 #define GEN7_RPSTAT1   0xA01C
 #define  GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT   7
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c 
b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 9a663b1d61c..baf64757b93 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_sta

[Mesa-dev] [PATCH v4 4/7] intel: genxml: add Force Posted field to MI_LRI

2018-06-20 Thread Lionel Landwerlin
The kernel uses it. It's not recommended to use it in the batchbuffer,
but the hardware doesn't seem to complain.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/genxml/gen10.xml | 1 +
 src/intel/genxml/gen11.xml | 1 +
 src/intel/genxml/gen7.xml  | 1 +
 src/intel/genxml/gen75.xml | 1 +
 src/intel/genxml/gen8.xml  | 1 +
 src/intel/genxml/gen9.xml  | 1 +
 6 files changed, 6 insertions(+)

diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml
index 541e4405716..e5e79d2c661 100644
--- a/src/intel/genxml/gen10.xml
+++ b/src/intel/genxml/gen10.xml
@@ -2974,6 +2974,7 @@
 
 
 
+
 
 
 
diff --git a/src/intel/genxml/gen11.xml b/src/intel/genxml/gen11.xml
index bd3800e4b79..41b5bf099d5 100644
--- a/src/intel/genxml/gen11.xml
+++ b/src/intel/genxml/gen11.xml
@@ -2963,6 +2963,7 @@
 
 
 
+
 
 
 
diff --git a/src/intel/genxml/gen7.xml b/src/intel/genxml/gen7.xml
index 6dde7973e69..e09e336d0cd 100644
--- a/src/intel/genxml/gen7.xml
+++ b/src/intel/genxml/gen7.xml
@@ -2018,6 +2018,7 @@
 
 
 
+
 
 
 
diff --git a/src/intel/genxml/gen75.xml b/src/intel/genxml/gen75.xml
index 5b01fd45400..613168e6f45 100644
--- a/src/intel/genxml/gen75.xml
+++ b/src/intel/genxml/gen75.xml
@@ -2378,6 +2378,7 @@
 
 
 
+
 
 
 
diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml
index 4ed41d15612..c8320c6ed63 100644
--- a/src/intel/genxml/gen8.xml
+++ b/src/intel/genxml/gen8.xml
@@ -2605,6 +2605,7 @@
 
 
 
+
 
 
 
diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml
index 318ae89d5e7..6181b893334 100644
--- a/src/intel/genxml/gen9.xml
+++ b/src/intel/genxml/gen9.xml
@@ -2892,6 +2892,7 @@
 
 
 
+
 
 
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 0/7] mesa/i965: Add support for INTEL_blackhole_render

2018-06-20 Thread Lionel Landwerlin
Hi all,

This is a respin of the blackhole render extension with Haswell
disabled. After digging a bit I found out that the kernel command
parser drops our batch when it contains a write to INSTPM.
Unfortunately I have to land those patches to be able to land the
kernel ones.

Cheers,

Lionel Landwerlin (7):
  i965: add force posted register load
  i965: add a skylake only pipe control recommendation
  i965: pipecontrol: allow NULL bo for writing registers
  intel: genxml: add Force Posted field to MI_LRI
  include: bump GL/GLES headers & registry
  mesa: add INTEL_blackhole_render
  i965: enable INTEL_blackhole_render

 include/GL/glcorearb.h|   52 +-
 include/GL/glext.h|   65 +-
 include/GL/glxext.h   |   20 +-
 include/GL/wglext.h   |6 +-
 include/GLES/gl.h |   15 +-
 include/GLES/glext.h  |   33 +-
 include/GLES2/gl2.h   |6 +-
 include/GLES2/gl2ext.h|  143 +-
 include/GLES3/gl3.h   |6 +-
 src/intel/genxml/gen10.xml|1 +
 src/intel/genxml/gen11.xml|1 +
 src/intel/genxml/gen7.xml |1 +
 src/intel/genxml/gen75.xml|1 +
 src/intel/genxml/gen8.xml |1 +
 src/intel/genxml/gen9.xml |1 +
 src/mapi/glapi/registry/gl.xml| 3995 -
 src/mesa/drivers/dri/i965/brw_clear.c |3 +
 src/mesa/drivers/dri/i965/brw_context.h   |4 +
 src/mesa/drivers/dri/i965/brw_defines.h   |9 +-
 src/mesa/drivers/dri/i965/brw_misc_state.c|   56 +
 src/mesa/drivers/dri/i965/brw_pipe_control.c  |   19 +
 src/mesa/drivers/dri/i965/brw_state.h |4 +
 src/mesa/drivers/dri/i965/brw_state_upload.c  |2 +
 src/mesa/drivers/dri/i965/genX_state_upload.c |4 +
 src/mesa/drivers/dri/i965/intel_batchbuffer.c |   13 +
 src/mesa/drivers/dri/i965/intel_extensions.c  |8 +
 src/mesa/drivers/dri/i965/intel_fbo.c |6 +
 src/mesa/drivers/dri/i965/intel_pixel_read.c  |3 +
 src/mesa/drivers/dri/i965/intel_tex_copy.c|3 +
 src/mesa/drivers/dri/i965/intel_tex_image.c   |5 +
 src/mesa/main/enable.c|   14 +
 src/mesa/main/extensions_table.h  |1 +
 src/mesa/main/mtypes.h|8 +
 33 files changed, 3213 insertions(+), 1296 deletions(-)

--
2.17.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH mesa 1/2] intel/aubinator: fix mem leak

2018-06-20 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 19/06/18 16:05, Eric Engestrom wrote:

Signed-off-by: Eric Engestrom 
---
  src/intel/tools/aubinator.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index c8d79ae377d68285bd1e..e4fb8adada354253143c 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -671,6 +671,7 @@ int main(int argc, char *argv[])
 /* close the stdout which is opened to write the output */
 close(1);
 free(xml_path);
+   free(file);
  
 wait(NULL);
  



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 07/16] intel/tools/aubinator: aubinate ppgtt aubs

2018-06-20 Thread Lionel Landwerlin

On 20/06/18 01:00, Rafael Antognolli wrote:

On Tue, Jun 19, 2018 at 02:45:22PM +0100, Lionel Landwerlin wrote:

From: Scott D Phillips 

v2: by Lionel
 Fix memfd_create compilation issue
 Fix pml4 address stored on 32 instead of 64bits
 Return no buffer if first ppgtt page is not mapped

Signed-off-by: Lionel Landwerlin 
---
  src/intel/tools/aubinator.c | 76 -
  1 file changed, 75 insertions(+), 1 deletion(-)

diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index 962546d360c..3368ac521bd 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -336,6 +336,68 @@ get_ggtt_batch_bo(void *user_data, uint64_t address)
  
 return bo;

  }
+
+static struct phys_mem *
+ppgtt_walk(uint64_t pml4, uint64_t address)
+{
+   uint64_t shift = 39;
+   uint64_t addr = pml4;
+   for (int level = 4; level > 0; level--) {
+  struct phys_mem *table = search_phys_mem(addr);
+  if (!table)
+ return NULL;
+  int index = (address >> shift) & 0x1ff;
+  uint64_t entry = ((uint64_t *)table->data)[index];
+  if (!(entry & 1))
+ return NULL;
+  addr = entry & ~0xfff;
+  shift -= 9;
+   }
+   return search_phys_mem(addr);
+}
+
+static bool
+ppgtt_mapped(uint64_t pml4, uint64_t address)
+{
+   return ppgtt_walk(pml4, address) != NULL;
+}
+
+static struct gen_batch_decode_bo
+get_ppgtt_batch_bo(void *user_data, uint64_t address)
+{
+   struct gen_batch_decode_bo bo = {0};
+   uint64_t pml4 = *(uint64_t *)user_data;
+
+   address &= ~0xfff;
+
+   if (!ppgtt_mapped(pml4, address))
+  return bo;
+
+   /* Map everything until the first gap since we don't know how much the
+* decoder actually needs.
+*/
+   uint64_t end = address;
+   while (ppgtt_mapped(pml4, end))
+  end += 4096;
+
+   bo.addr = address;
+   bo.size = end - address;
+   bo.map = mmap(NULL, bo.size, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+   assert(bo.map != MAP_FAILED);
+
+   for (uint64_t page = address; page < end; page += 4096) {
+  struct phys_mem *phys_mem = ppgtt_walk(pml4, page);
+
+  void *res = mmap((uint8_t *)bo.map + (page - bo.addr), 4096, PROT_READ,
+   MAP_SHARED | MAP_FIXED, mem_fd, phys_mem->fd_offset);
+  assert(res != MAP_FAILED);
+   }
+
+   add_gtt_bo_map(bo, true);
+
+   return bo;
+}
+
  #define GEN_ENGINE_RENDER 1
  #define GEN_ENGINE_BLITTER 2
  
@@ -377,6 +439,7 @@ handle_trace_block(uint32_t *p)

}
  
(void)engine; /* TODO */

+  batch_ctx.get_bo = get_ggtt_batch_bo;
gen_print_batch(_ctx, bo.map, bo.size, 0);
  
clear_bo_maps();

@@ -402,7 +465,7 @@ aubinator_init(uint16_t aub_pci_id, const char *app_name)
 batch_flags |= GEN_BATCH_DECODE_FLOATS;
  
 gen_batch_decode_ctx_init(_ctx, , outfile, batch_flags,

- xml_path, get_ggtt_batch_bo, NULL, NULL);
+ xml_path, NULL, NULL, NULL);
 batch_ctx.max_vbo_decoded_lines = max_vbo_lines;
  
 char *color = GREEN_HEADER, *reset_color = NORMAL;

@@ -542,12 +605,20 @@ handle_memtrace_reg_write(uint32_t *p)
 uint32_t ring_buffer_head = context[5];
 uint32_t ring_buffer_tail = context[7];
 uint32_t ring_buffer_start = context[9];
+   uint64_t pml4 = (uint64_t)context[49] << 32 | context[51];
  
 struct gen_batch_decode_bo ring_bo = get_ggtt_batch_bo(NULL,

ring_buffer_start);
 assert(ring_bo.size > 0);
 void *commands = (uint8_t *)ring_bo.map + (ring_bo.addr - 
ring_buffer_start);
  
+   if (context_descriptor & 0x100 /* ppgtt */) {

+  batch_ctx.get_bo = get_ppgtt_batch_bo;
+  batch_ctx.user_data = 
+   } else {
+  batch_ctx.get_bo = get_ggtt_batch_bo;
+   }
+
 (void)engine; /* TODO */
 gen_print_batch(_ctx, commands, ring_buffer_tail - ring_buffer_head,
 0);
@@ -849,6 +920,9 @@ int main(int argc, char *argv[])
  
 list_inithead();
  
+   mem_fd = memfd_create("phys memory", 0);

+
+

It seems like this memfd_create() got duplicated here (it was added in
the previous patch).


Oops... Dropped locally.




 file = aub_file_open(input_file);
  
 while (aub_file_more_stuff(file) &&

--
2.17.1


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 02/16] intel: aubinator: remove standard input processing option

2018-06-20 Thread Lionel Landwerlin

On 19/06/18 23:56, Rafael Antognolli wrote:

On Tue, Jun 19, 2018 at 11:40:30AM -0700, Rafael Antognolli wrote:

On Tue, Jun 19, 2018 at 02:45:17PM +0100, Lionel Landwerlin wrote:

Now that we rely on mmap of the data to parse, we can't process the
standard input anymore.

Didn't we rely on mmap of the data since forever?

Oh, I think it's because of patch 04, right? If so, I think we need to
update the message to reflect that this is going to be changed in a
newer commit. And maybe explain it a little more, something like:

"On a follow up commit in this series, we stop copying the data from the
mmap'ed file into our big gtt mmap, and start referencing data in it
directly. So reallocating the read buffer and adding more data from
stdin wouldn't work. For that reason, let's stop supporting stdin
process."

Or something like that, assuming I understood it correclty.

Anyway, this patch is

Reviewed-by: Rafael Antognolli 


Indeed, sorry for the confusion :(





This isn't much of a big deal because we have in-process batch decoder
(run with INTEL_DEBUG=batch) that supports essentially doing the same
thing.

Signed-off-by: Lionel Landwerlin 
---
  src/intel/tools/aubinator.c | 102 +---
  1 file changed, 12 insertions(+), 90 deletions(-)

diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index 949ba96e556..3f9047e69a8 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -350,17 +350,6 @@ aub_file_open(const char *filename)
 return file;
  }
  
-static struct aub_file *

-aub_file_stdin(void)
-{
-   struct aub_file *file;
-
-   file = calloc(1, sizeof *file);
-   file->stream = stdin;
-
-   return file;
-}
-
  #define TYPE(dw)   (((dw) >> 29) & 7)
  #define OPCODE(dw) (((dw) >> 23) & 0x3f)
  #define SUBOPCODE(dw)  (((dw) >> 16) & 0x7f)
@@ -398,8 +387,7 @@ aub_file_decode_batch(struct aub_file *file)
 uint32_t *p, h, *new_cursor;
 int header_length, bias;
  
-   if (file->end - file->cursor < 1)

-  return AUB_ITEM_DECODE_NEED_MORE_DATA;
+   assert(file->cursor < file->end);
  
 p = file->cursor;

 h = *p;
@@ -421,13 +409,11 @@ aub_file_decode_batch(struct aub_file *file)
  
 new_cursor = p + header_length + bias;

 if ((h & 0x) == MAKE_HEADER(TYPE_AUB, OPCODE_AUB, 
SUBOPCODE_BLOCK)) {
-  if (file->end - file->cursor < 4)
- return AUB_ITEM_DECODE_NEED_MORE_DATA;
+  assert(file->end - file->cursor >= 4);
new_cursor += p[4] / 4;
 }
  
-   if (new_cursor > file->end)

-  return AUB_ITEM_DECODE_NEED_MORE_DATA;
+   assert(new_cursor <= file->end);
  
 switch (h & 0x) {

 case MAKE_HEADER(TYPE_AUB, OPCODE_AUB, SUBOPCODE_HEADER):
@@ -468,48 +454,6 @@ aub_file_more_stuff(struct aub_file *file)
 return file->cursor < file->end || (file->stream && !feof(file->stream));
  }
  
-#define AUB_READ_BUFFER_SIZE (4096)

-#define MAX(a, b) ((a) < (b) ? (b) : (a))
-
-static void
-aub_file_data_grow(struct aub_file *file)
-{
-   size_t old_size = (file->mem_end - file->map) * 4;
-   size_t new_size = MAX(old_size * 2, AUB_READ_BUFFER_SIZE);
-   uint32_t *new_start = realloc(file->map, new_size);
-
-   file->cursor = new_start + (file->cursor - file->map);
-   file->end = new_start + (file->end - file->map);
-   file->map = new_start;
-   file->mem_end = file->map + (new_size / 4);
-}
-
-static bool
-aub_file_data_load(struct aub_file *file)
-{
-   size_t r;
-
-   if (file->stream == NULL)
-  return false;
-
-   /* First remove any consumed data */
-   if (file->cursor > file->map) {
-  memmove(file->map, file->cursor,
-  (file->end - file->cursor) * 4);
-  file->end -= file->cursor - file->map;
-  file->cursor = file->map;
-   }
-
-   /* Then load some new data in */
-   if ((file->mem_end - file->end) < (AUB_READ_BUFFER_SIZE / 4))
-  aub_file_data_grow(file);
-
-   r = fread(file->end, 1, (file->mem_end - file->end) * 4, file->stream);
-   file->end += r / 4;
-
-   return r != 0;
-}
-
  static void
  setup_pager(void)
  {
@@ -541,9 +485,8 @@ static void
  print_help(const char *progname, FILE *file)
  {
 fprintf(file,
-   "Usage: %s [OPTION]... [FILE]\n"
-   "Decode aub file contents from either FILE or the standard 
input.\n\n"
-   "A valid --gen option must be provided.\n\n"
+   "Usage: %s [OPTION]... FILE\n"
+   "Decode aub file contents from FILE.\n\n"
 "  --help display this help and exit\n"
 "  --gen=platform decode for given platform (3 letter platform 
name)\n"
 "  --headers  decode on

<    1   2   3   4   5   6   7   8   9   10   >