Re: [Mesa-dev] [PATCH v2] egl: check if colorspace/surface type is supported

2018-05-03 Thread Tapani Pälli



On 05/03/2018 08:49 PM, Emil Velikov wrote:

On 2 May 2018 at 17:23, Juan A. Suarez Romero  wrote:

According to EGL 1.4 spec, section 3.5.1 ("Creating On-Screen Rendering
Surfaces"), if config does not support the colorspace or alpha format
attributes specified in attrib_list (as defined for
eglCreateWindowSurface), an EGL_BAD_MATCH error is generated.

This fixes dEQP-EGL.functional.wide_color.*_888_colorspace_srgb (still
not merged,
https://android-review.googlesource.com/c/platform/external/deqp/+/667322),
which is crashing when trying to create a windows surface with RGB888
configuration and sRGB colorspace.

v2: Handle the fix in other backends (Tapani)
---
  src/egl/drivers/dri2/platform_drm.c  | 5 +
  src/egl/drivers/dri2/platform_wayland.c  | 6 ++
  src/egl/drivers/dri2/platform_x11.c  | 5 +
  src/egl/drivers/dri2/platform_x11_dri3.c | 5 +
  4 files changed, 21 insertions(+)

diff --git a/src/egl/drivers/dri2/platform_drm.c 
b/src/egl/drivers/dri2/platform_drm.c
index dc4efea9103..35bc4b5b1ac 100644
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -155,6 +155,11 @@ dri2_drm_create_window_surface(_EGLDriver *drv, 
_EGLDisplay *disp,
 config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
  dri2_surf->base.GLColorspace);

+   if (!config) {
+  _eglError(EGL_BAD_MATCH, "Unsupported surfacetype/colorspace 
configuration");


Seems like android and surfaceless are missing the EGL_BAD_MATCH bit.
AFAICT they need it right?



That is true, I've sent a patch to add the error case.

BTW there seems something wrong in dEQP surfaceless support (?) Are you 
guys able to run it? For me it says EGL is not supported.


// Tapani
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] egl: add EGL_BAD_MATCH error case for surfaceless and android

2018-05-03 Thread Tapani Pälli
Just like is done for other backends when suitable config is not
found (added in fd4eba4929).

Signed-off-by: Tapani Pälli 
---
 src/egl/drivers/dri2/platform_android.c | 4 +++-
 src/egl/drivers/dri2/platform_surfaceless.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/egl/drivers/dri2/platform_android.c 
b/src/egl/drivers/dri2/platform_android.c
index 7f1a496ea2..1d6ed92bd6 100644
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -337,8 +337,10 @@ droid_create_surface(_EGLDriver *drv, _EGLDisplay *disp, 
EGLint type,
 
config = dri2_get_dri_config(dri2_conf, type,
 dri2_surf->base.GLColorspace);
-   if (!config)
+   if (!config) {
+  _eglError(EGL_BAD_MATCH, "Unsupported surfacetype/colorspace 
configuration");
   goto cleanup_surface;
+   }
 
if (dri2_dpy->image_driver)
   createNewDrawable = dri2_dpy->image_driver->createNewDrawable;
diff --git a/src/egl/drivers/dri2/platform_surfaceless.c 
b/src/egl/drivers/dri2/platform_surfaceless.c
index 70b302c0ce..a0348a5e95 100644
--- a/src/egl/drivers/dri2/platform_surfaceless.c
+++ b/src/egl/drivers/dri2/platform_surfaceless.c
@@ -130,8 +130,10 @@ dri2_surfaceless_create_surface(_EGLDriver *drv, 
_EGLDisplay *disp, EGLint type,
config = dri2_get_dri_config(dri2_conf, type,
 dri2_surf->base.GLColorspace);
 
-   if (!config)
+   if (!config) {
+  _eglError(EGL_BAD_MATCH, "Unsupported surfacetype/colorspace 
configuration");
   goto cleanup_surface;
+   }
 
dri2_surf->dri_drawable =
   dri2_dpy->image_driver->createNewDrawable(dri2_dpy->dri_screen, config,
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 8/9] anv: elide relocations to pinned target bos

2018-05-03 Thread Jason Ekstrand
If I'm honest, I don't really like the way this patch worked out.  It has
the virtue of being fairly simple and a nice incremental change.  However,
it seems to me like we should be able to do better.  That said, I don't
really know how off-hand and this looks correct and gets us going.

On Wed, May 2, 2018 at 9:01 AM, Scott D Phillips  wrote:

> References to pinned bos won't need relocated, so just write the
> final value of the reference into the bo. Add a `set` to the
> relocation lists for tracking dependencies that were previously
> tracked by relocations.
> ---
>  src/intel/vulkan/anv_batch_chain.c | 36 ++
> ++
>  src/intel/vulkan/anv_private.h |  3 +++
>  2 files changed, 39 insertions(+)
>
> diff --git a/src/intel/vulkan/anv_batch_chain.c
> b/src/intel/vulkan/anv_batch_chain.c
> index 52f69045519..a4856376d8d 100644
> --- a/src/intel/vulkan/anv_batch_chain.c
> +++ b/src/intel/vulkan/anv_batch_chain.c
> @@ -75,11 +75,24 @@ anv_reloc_list_init_clone(struct anv_reloc_list *list,
>return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
> }
>
> +   list->deps = _mesa_set_create(NULL, _mesa_hash_pointer,
> + _mesa_key_pointer_equal);
> +
> +   if (!list->deps) {
> +  vk_free(alloc, list->relocs);
> +  vk_free(alloc, list->reloc_bos);
> +  return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
> +   }
> +
> if (other_list) {
>memcpy(list->relocs, other_list->relocs,
>   list->array_length * sizeof(*list->relocs));
>memcpy(list->reloc_bos, other_list->reloc_bos,
>   list->array_length * sizeof(*list->reloc_bos));
> +  struct set_entry *entry;
> +  set_foreach(other_list->deps, entry) {
> + _mesa_set_add_pre_hashed(list->deps, entry->hash, entry->key);
> +  }
> }
>
> return VK_SUCCESS;
> @@ -98,6 +111,7 @@ anv_reloc_list_finish(struct anv_reloc_list *list,
>  {
> vk_free(alloc, list->relocs);
> vk_free(alloc, list->reloc_bos);
> +   _mesa_set_destroy(list->deps, NULL);
>  }
>
>  static VkResult
> @@ -148,6 +162,11 @@ anv_reloc_list_add(struct anv_reloc_list *list,
> struct drm_i915_gem_relocation_entry *entry;
> int index;
>
> +   if (target_bo->flags & EXEC_OBJECT_PINNED) {
> +  _mesa_set_add(list->deps, target_bo);
> +  return VK_SUCCESS;
> +   }
> +
> VkResult result = anv_reloc_list_grow(list, alloc, 1);
> if (result != VK_SUCCESS)
>return result;
> @@ -185,6 +204,12 @@ anv_reloc_list_append(struct anv_reloc_list *list,
>list->relocs[i + list->num_relocs].offset += offset;
>
> list->num_relocs += other->num_relocs;
> +
> +   struct set_entry *entry;
> +   set_foreach(other->deps, entry) {
> +  _mesa_set_add_pre_hashed(list->deps, entry->hash, entry->key);
> +   }
> +
> return VK_SUCCESS;
>  }
>
> @@ -338,6 +363,7 @@ anv_batch_bo_start(struct anv_batch_bo *bbo, struct
> anv_batch *batch,
> batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
> batch->relocs = >relocs;
> bbo->relocs.num_relocs = 0;
> +   _mesa_set_clear(bbo->relocs.deps, NULL);
>  }
>
>  static void
> @@ -785,6 +811,7 @@ anv_cmd_buffer_reset_batch_bo_chain(struct
> anv_cmd_buffer *cmd_buffer)
> cmd_buffer->bt_next = 0;
>
> cmd_buffer->surface_relocs.num_relocs = 0;
> +   _mesa_set_clear(cmd_buffer->surface_relocs.deps, NULL);
> cmd_buffer->last_ss_pool_center = 0;
>
> /* Reset the list of seen buffers */
> @@ -1070,6 +1097,15 @@ anv_execbuf_add_bo(struct anv_execbuf *exec,
>   if (result != VK_SUCCESS)
>  return result;
>}
> +
> +  struct set_entry *entry;
> +  set_foreach(relocs->deps, entry) {
> + VkResult result = anv_execbuf_add_bo(exec, entry->key, NULL,
> +  extra_flags, alloc);
>

Usually, when walking hash sets, it's good to sort them by something (such
as GEM handle) so we get some amount of determinism.  I'd like to think it
doesn't matter but I know better. :-)


> +
> + if (result != VK_SUCCESS)
> +return result;
> +  }
> }
>
> return VK_SUCCESS;
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
> private.h
> index 81d50b3e770..3a448e41bae 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -46,7 +46,9 @@
>  #include "blorp/blorp.h"
>  #include "compiler/brw_compiler.h"
>  #include "util/macros.h"
> +#include "util/hash_table.h"
>
 #include "util/list.h"
> +#include "util/set.h"
>  #include "util/u_atomic.h"
>  #include "util/u_vector.h"
>  #include "util/vma.h"
> @@ -1047,6 +1049,7 @@ struct anv_reloc_list {
> uint32_t array_length;
> struct drm_i915_gem_relocation_entry *   relocs;
> struct anv_bo ** reloc_bos;
> +   struct set * deps;
>  };
>
>  VkResult 

Re: [Mesa-dev] [PATCH 02/10] util: Add a virtual memory allocator

2018-05-03 Thread Jason Ekstrand
On Thu, May 3, 2018 at 6:12 PM, Kenneth Graunke 
wrote:

> From: Jason Ekstrand 
>
> This is simple linear-walk first-fit allocator roughly based on the
> allocator in the radeon winsys code.  This allocator has two primary
> functional differences:
>
>  1) It cleanly returns 0 on allocation failure
>
>  2) It allocates addresses top-down instead of bottom-up.
>
> The second one is needed for Intel because high addresses (with bit 47
> set) need to be canonicalized in order to work properly.  If we allocate
> bottom-up, then high addresses will be very rare (if they ever happen).
> We'd rather always have high addresses so that the canonicalization code
> gets better testing.
> ---
>  src/util/Makefile.sources |   4 +-
>  src/util/meson.build  |   2 +
>  src/util/vma.c| 232 ++
>  src/util/vma.h|  53 +
>  4 files changed, 290 insertions(+), 1 deletion(-)
>  create mode 100644 src/util/vma.c
>  create mode 100644 src/util/vma.h
>
> This is the exact same patch as Scott recently sent:
> https://patchwork.freedesktop.org/patch/219951/
>
> It may make more sense to review it on that existing thread.  I mostly
> resent it here to note that it's required for this series.
>

You could review it. :-P


> diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
> index 104ecae8ed3..534520ce763 100644
> --- a/src/util/Makefile.sources
> +++ b/src/util/Makefile.sources
> @@ -56,7 +56,9 @@ MESA_UTIL_FILES := \
> u_string.h \
> u_thread.h \
> u_vector.c \
> -   u_vector.h
> +   u_vector.h \
> +   vma.c \
> +   vma.h
>
>  MESA_UTIL_GENERATED_FILES = \
> format_srgb.c
> diff --git a/src/util/meson.build b/src/util/meson.build
> index eece1cefef6..14660e0fa0c 100644
> --- a/src/util/meson.build
> +++ b/src/util/meson.build
> @@ -81,6 +81,8 @@ files_mesa_util = files(
>'u_thread.h',
>'u_vector.c',
>'u_vector.h',
> +  'vma.c',
> +  'vma.h',
>  )
>
>  install_data('drirc', install_dir : get_option('sysconfdir'))
> diff --git a/src/util/vma.c b/src/util/vma.c
> new file mode 100644
> index 000..3d61f6969ed
> --- /dev/null
> +++ b/src/util/vma.c
> @@ -0,0 +1,232 @@
> +/*
> + * Copyright © 2018 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> "Software"),
> + * to deal in the Software without restriction, including without
> limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> next
> + * paragraph) shall be included in all copies or substantial portions of
> the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include 
> +
> +#include "util/u_math.h"
> +#include "util/vma.h"
> +
> +struct util_vma_hole {
> +   struct list_head link;
> +   uint64_t offset;
> +   uint64_t size;
> +};
> +
> +#define util_vma_foreach_hole(_hole, _heap) \
> +   list_for_each_entry(struct util_vma_hole, _hole, &(_heap)->holes, link)
> +
> +#define util_vma_foreach_hole_safe(_hole, _heap) \
> +   list_for_each_entry_safe(struct util_vma_hole, _hole,
> &(_heap)->holes, link)
> +
> +void
> +util_vma_heap_init(struct util_vma_heap *heap,
> +   uint64_t start, uint64_t size)
> +{
> +   list_inithead(>holes);
> +   util_vma_heap_free(heap, start, size);
> +}
> +
> +void
> +util_vma_heap_finish(struct util_vma_heap *heap)
> +{
> +   util_vma_foreach_hole_safe(hole, heap)
> +  free(hole);
> +}
> +
> +static void
> +util_vma_heap_validate(struct util_vma_heap *heap)
> +{
> +   uint64_t prev_offset = 0;
> +   util_vma_foreach_hole(hole, heap) {
> +  assert(hole->offset > 0);
> +  assert(hole->size > 0);
> +
> +  if (>link == heap->holes.next) {
> + /* This must be the top-most hole.  Assert that, if it
> overflows, it
> +  * overflows to 0, i.e. 2^64.
> +  */
> + assert(hole->size + hole->offset == 0 ||
> +hole->size + hole->offset > hole->offset);
> +  } else {
> + /* This is not the top-most hole so it must not overflow and, in
> +  * fact, must be strictly lower than the top-most 

Re: [Mesa-dev] [PATCH 7/9] anv: use a separate pool for binding tables when soft pinning

2018-05-03 Thread Jason Ekstrand
On Wed, May 2, 2018 at 9:01 AM, Scott D Phillips  wrote:

> Soft pinning lets us satisfy the binding table address
> requirements without using both sides of a growing state_pool.
>
> If you do use both sides of a state pool, then you need to read
> the state pool's center_bo_offset (with the device mutex held) to
> know the final offset of relocations that target the state pool
> bo.
>
> By having a separate pool for binding tables that only grows in
> the forward direction, the center_bo_offset is always 0 and
> relocations don't need an update pass to adjust relocations with
> the mutex held.
> ---
>  src/intel/vulkan/anv_batch_chain.c | 23 +++
>  src/intel/vulkan/anv_device.c  | 28 +++-
>  src/intel/vulkan/anv_private.h | 36 ++
> ++
>  3 files changed, 78 insertions(+), 9 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_batch_chain.c
> b/src/intel/vulkan/anv_batch_chain.c
> index 09514c7b84a..52f69045519 100644
> --- a/src/intel/vulkan/anv_batch_chain.c
> +++ b/src/intel/vulkan/anv_batch_chain.c
> @@ -452,7 +452,7 @@ anv_cmd_buffer_surface_base_address(struct
> anv_cmd_buffer *cmd_buffer)
>  {
> struct anv_state *bt_block = u_vector_head(_buffer->bt_
> block_states);
> return (struct anv_address) {
> -  .bo = _buffer->device->surface_state_pool.block_pool.bo,
> +  .bo = anv_binding_table_pool_bo(cmd_buffer->device),
>.offset = bt_block->offset,
> };
>  }
> @@ -619,7 +619,8 @@ struct anv_state
>  anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
> uint32_t entries, uint32_t
> *state_offset)
>  {
> -   struct anv_state_pool *state_pool = _buffer->device->surface_
> state_pool;
> +   struct anv_device *device = cmd_buffer->device;
> +   struct anv_state_pool *state_pool = >surface_state_pool;
> struct anv_state *bt_block = u_vector_head(_buffer->bt_
> block_states);
> struct anv_state state;
>
> @@ -629,12 +630,18 @@ anv_cmd_buffer_alloc_binding_table(struct
> anv_cmd_buffer *cmd_buffer,
>return (struct anv_state) { 0 };
>
> state.offset = cmd_buffer->bt_next;
> -   state.map = state_pool->block_pool.map + bt_block->offset +
> state.offset;
> +   state.map = anv_binding_table_pool_map(device) + bt_block->offset +
> +  state.offset;
>
> cmd_buffer->bt_next += state.alloc_size;
>
> -   assert(bt_block->offset < 0);
> -   *state_offset = -bt_block->offset;
> +   if (device->use_separate_binding_table_pool) {
> +  *state_offset = device->surface_state_pool.block_pool.offset -
> + device->binding_table_pool.block_pool.offset - bt_block->offset;
> +   } else {
> +  assert(bt_block->offset < 0);
> +  *state_offset = -bt_block->offset;
> +   }
>
> return state;
>  }
> @@ -666,7 +673,7 @@ anv_cmd_buffer_new_binding_table_block(struct
> anv_cmd_buffer *cmd_buffer)
>return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
> }
>
> -   *bt_block = anv_state_pool_alloc_back(state_pool);
> +   *bt_block = anv_binding_table_pool_alloc(cmd_buffer->device);
> cmd_buffer->bt_next = 0;
>
> return VK_SUCCESS;
> @@ -740,7 +747,7 @@ anv_cmd_buffer_fini_batch_bo_chain(struct
> anv_cmd_buffer *cmd_buffer)
>  {
> struct anv_state *bt_block;
> u_vector_foreach(bt_block, _buffer->bt_block_states)
> -  anv_state_pool_free(_buffer->device->surface_state_pool,
> *bt_block);
> +  anv_binding_table_pool_free(cmd_buffer->device, *bt_block);
> u_vector_finish(_buffer->bt_block_states);
>
> anv_reloc_list_finish(_buffer->surface_relocs,
> _buffer->pool->alloc);
> @@ -772,7 +779,7 @@ anv_cmd_buffer_reset_batch_bo_chain(struct
> anv_cmd_buffer *cmd_buffer)
>
> while (u_vector_length(_buffer->bt_block_states) > 1) {
>struct anv_state *bt_block = u_vector_remove(_buffer->
> bt_block_states);
> -  anv_state_pool_free(_buffer->device->surface_state_pool,
> *bt_block);
> +  anv_binding_table_pool_free(cmd_buffer->device, *bt_block);
> }
> assert(u_vector_length(_buffer->bt_block_states) == 1);
> cmd_buffer->bt_next = 0;
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 2837d2f83ca..d1f57081b77 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -1637,9 +1637,32 @@ VkResult anv_CreateDevice(
> if (result != VK_SUCCESS)
>goto fail_instruction_state_pool;
>
> +   device->use_separate_binding_table_pool = physical_device->has_exec_
> softpin;
> +
> +   if (device->use_separate_binding_table_pool) {
> +  result = anv_state_pool_init(>binding_table_pool, device,
> 4096,
> +   bo_flags);
> +  if (result != VK_SUCCESS)
> + goto fail_surface_state_pool;
> +
> +  if (device->surface_state_pool.block_pool.offset <
> +  device->binding_table_pool.block_pool.offset) {
> +
> + uint64_t tmp;
> + 

Re: [Mesa-dev] [PATCH] mesa/main/readpix: Correct handling of packed floating point values

2018-05-03 Thread Gurchetan Singh
For those wondering why the dEQP CTS tests this fixes (for example
dEQP-GLES3.functional.fragment_out.basic.float.r11f_g11f_b10f_lowp_float)
pass outside the VM, it seems it's because the test cases read pixels
with type = GL_FLOAT.  virglrenderer always sets type =
GL_UNSIGNED_INT_10F_11F_11F_REV on the host when reading back
GL_R11F_G11F_B10F and probably converts to GL_FLOAT on the guest.  I
guess nobody, even conformance suites, wanted to deal with
GL_UNSIGNED_INT_10F_11F_11F_REV ...

Reviewed-by: Gurchetan Singh 
(don't have commit access so hopefully someone merges this)

On Fri, Apr 27, 2018 at 9:04 AM, Gert Wollny  wrote:
> From: Gert Wollny 
>
> Make sure that clamping in the pixel transfer operations is enabled/disabled
> for packed floating point values just like it is done for single normal and
> half precision floating point values.
>
> This fixes a series of CTS tests with virgl that use r11f_g11f_b10f
> buffers as target, and where virglrenderer reads these surfaces back
> using the format GL_UNSIGNED_INT_10F_11F_11F_REV.
>
> Signed-off-by: Gert Wollny 
> ---
> PS: I don't have write permissions to mesa git
>
>  src/mesa/main/readpix.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
> index a4eea3043d..c5fc66988b 100644
> --- a/src/mesa/main/readpix.c
> +++ b/src/mesa/main/readpix.c
> @@ -106,7 +106,8 @@ _mesa_get_readpixels_transfer_ops(const struct gl_context 
> *ctx,
>/* For blit-based ReadPixels packing, the clamping is done 
> automatically
> * unless the type is float. */
>if (_mesa_get_clamp_read_color(ctx, ctx->ReadBuffer) &&
> -  (type == GL_FLOAT || type == GL_HALF_FLOAT)) {
> +  (type == GL_FLOAT || type == GL_HALF_FLOAT ||
> +   type == GL_UNSIGNED_INT_10F_11F_11F_REV)) {
>   transferOps |= IMAGE_CLAMP_BIT;
>}
> }
> @@ -114,7 +115,8 @@ _mesa_get_readpixels_transfer_ops(const struct gl_context 
> *ctx,
>/* For CPU-based ReadPixels packing, the clamping must always be done
> * for non-float types, */
>if (_mesa_get_clamp_read_color(ctx, ctx->ReadBuffer) ||
> -  (type != GL_FLOAT && type != GL_HALF_FLOAT)) {
> +  (type != GL_FLOAT && type != GL_HALF_FLOAT &&
> +   type != GL_UNSIGNED_INT_10F_11F_11F_REV)) {
>   transferOps |= IMAGE_CLAMP_BIT;
>}
> }
> --
> 2.17.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/16] ac: enable both RBs on Kaveri

2018-05-03 Thread Marek Olšák
From: Marek Olšák 

This can result in 2x increase in performance on non-harvested Kaveris.

v2: don't do it on radeon

Tested-by: Michel Dänzer 
---
 src/amd/common/ac_gpu_info.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 031fd183b6f..8147fb0bbc3 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -309,21 +309,26 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
info->uvd_enc_supported =
uvd_enc.available_rings ? true : false;
info->has_userptr = true;
info->has_syncobj = has_syncobj(fd);
info->has_syncobj_wait_for_submit = info->has_syncobj && 
info->drm_minor >= 20;
info->has_fence_to_handle = info->has_syncobj && info->drm_minor >= 21;
info->has_ctx_priority = info->drm_minor >= 22;
/* TODO: Enable this once the kernel handles it efficiently. */
info->has_local_buffers = info->drm_minor >= 20 &&
  !info->has_dedicated_vram;
+
info->num_render_backends = amdinfo->rb_pipes;
+   /* The value returned by the kernel driver was wrong. */
+   if (info->family == CHIP_KAVERI)
+   info->num_render_backends = 2;
+
info->clock_crystal_freq = amdinfo->gpu_counter_freq;
if (!info->clock_crystal_freq) {
fprintf(stderr, "amdgpu: clock crystal frequency is 0, 
timestamps will be wrong\n");
info->clock_crystal_freq = 1;
}
info->tcc_cache_line_size = 64; /* TC L2 line size on GCN */
info->gb_addr_config = amdinfo->gb_addr_cfg;
if (info->chip_class == GFX9) {
info->num_tile_pipes = 1 << 
G_0098F8_NUM_PIPES(amdinfo->gb_addr_cfg);
info->pipe_interleave_bytes =
@@ -620,37 +625,43 @@ ac_get_raster_config(struct radeon_info *info,
raster_config = 0x;
else
raster_config = 0x0002;
raster_config_1 = 0x;
break;
case CHIP_CARRIZO:
raster_config = 0x0002;
raster_config_1 = 0x;
break;
case CHIP_KAVERI:
-   /* KV should be 0x0002, but that causes problems with 
radeon */
-   raster_config = 0x; /* 0x0002 */
+   raster_config = 0x0002;
raster_config_1 = 0x;
break;
case CHIP_KABINI:
case CHIP_MULLINS:
case CHIP_STONEY:
raster_config = 0x;
raster_config_1 = 0x;
break;
default:
fprintf(stderr,
"ac: Unknown GPU, using 0 for raster_config\n");
raster_config = 0x;
raster_config_1 = 0x;
break;
}
+
+   /* drm/radeon on Kaveri is buggy, so disable 1 RB to work around it.
+* This decreases performance by up to 50% when the RB is the 
bottleneck.
+*/
+   if (info->family == CHIP_KAVERI && info->drm_major == 2)
+   raster_config = 0x;
+
*raster_config_p = raster_config;
*raster_config_1_p = raster_config_1;
 }
 
 void
 ac_get_harvested_configs(struct radeon_info *info,
 unsigned raster_config,
 unsigned *cik_raster_config_1_p,
 unsigned *raster_config_se)
 {
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Ryzen 2500U lockup, HP Envy 360

2018-05-03 Thread Jerry DeLisle

Hi folks,

I search a thread earlier that identified that someone could reproduce 
this problem.


I don't have skills or tools to debug this, though I would not mind 
learning. I do other Open Software (gcc-gfortran)


Anyway, when I followed the thread, it was not clear if a fix had been 
found yet.


I am running Fedora 28 and today's updates included mesa 18.0.1-2 which 
I have confirmed does not fix the issue. Is there a later version that I 
should try?  Any workarounds?


Regards,

Jerry
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/10] i965: Introduce a "memory zone" concept on BO allocation.

2018-05-03 Thread Kenneth Graunke
We're planning to start managing the PPGTT in userspace in the near
future, rather than relying on the kernel to assign addresses.  While
most buffers can go anywhere, some need to be restricted to within 4GB
of a base address.

This commit adds a "memory zone" parameter to the BO allocation
functions, which lets the caller specify which base address the BO will
be associated with, or BRW_MEMZONE_OTHER for the full 48-bit VMA.
---
 src/mesa/drivers/dri/i965/brw_blorp.c |  3 +-
 src/mesa/drivers/dri/i965/brw_bufmgr.c| 20 ++
 src/mesa/drivers/dri/i965/brw_bufmgr.h| 40 ++-
 src/mesa/drivers/dri/i965/brw_context.h   |  1 +
 .../drivers/dri/i965/brw_performance_query.c  |  5 ++-
 src/mesa/drivers/dri/i965/brw_pipe_control.c  |  3 +-
 src/mesa/drivers/dri/i965/brw_program.c   |  8 ++--
 src/mesa/drivers/dri/i965/brw_program_cache.c |  6 ++-
 src/mesa/drivers/dri/i965/brw_queryobj.c  |  8 ++--
 src/mesa/drivers/dri/i965/gen6_queryobj.c |  3 +-
 src/mesa/drivers/dri/i965/gen6_sol.c  |  6 ++-
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 15 ---
 .../drivers/dri/i965/intel_buffer_objects.c   |  8 ++--
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c |  7 +++-
 src/mesa/drivers/dri/i965/intel_screen.c  |  8 ++--
 src/mesa/drivers/dri/i965/intel_upload.c  |  3 +-
 16 files changed, 107 insertions(+), 37 deletions(-)

For what it's worth, I have a prototype that has separate memzones
for Instruction Base Address, Surface State Base Address, Dynamic State
Base Address, and "other" for the rest of the VMA.  It's worked out very
nicely, so I feel pretty confident that this is a good approach.

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index ba14136edc6..44394be0781 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -817,7 +817,8 @@ blorp_get_client_bo(struct brw_context *brw,
* data which we need to copy into a BO.
*/
   struct brw_bo *bo =
- brw_bo_alloc(brw->bufmgr, "tmp_tex_subimage_src", size);
+ brw_bo_alloc(brw->bufmgr, "tmp_tex_subimage_src", size,
+  BRW_MEMZONE_OTHER);
   if (bo == NULL) {
  perf_debug("intel_texsubimage: temp bo creation failed: size = %u\n",
 size);
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 66f30a1637f..66828f319be 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -268,6 +268,7 @@ static struct brw_bo *
 bo_alloc_internal(struct brw_bufmgr *bufmgr,
   const char *name,
   uint64_t size,
+  enum brw_memory_zone memzone,
   unsigned flags,
   uint32_t tiling_mode,
   uint32_t stride)
@@ -426,23 +427,27 @@ err:
 
 struct brw_bo *
 brw_bo_alloc(struct brw_bufmgr *bufmgr,
- const char *name, uint64_t size)
+ const char *name, uint64_t size,
+ enum brw_memory_zone memzone)
 {
-   return bo_alloc_internal(bufmgr, name, size, 0, I915_TILING_NONE, 0);
+   return bo_alloc_internal(bufmgr, name, size, memzone,
+0, I915_TILING_NONE, 0);
 }
 
 struct brw_bo *
 brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr, const char *name,
-   uint64_t size, uint32_t tiling_mode, uint32_t pitch,
+   uint64_t size, enum brw_memory_zone memzone,
+   uint32_t tiling_mode, uint32_t pitch,
unsigned flags)
 {
-   return bo_alloc_internal(bufmgr, name, size, flags, tiling_mode, pitch);
+   return bo_alloc_internal(bufmgr, name, size, memzone,
+flags, tiling_mode, pitch);
 }
 
 struct brw_bo *
 brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr, const char *name,
-  int x, int y, int cpp, uint32_t tiling,
-  uint32_t *pitch, unsigned flags)
+  int x, int y, int cpp, enum brw_memory_zone memzone,
+  uint32_t tiling, uint32_t *pitch, unsigned flags)
 {
uint64_t size;
uint32_t stride;
@@ -477,7 +482,8 @@ brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr, const char 
*name,
if (tiling == I915_TILING_NONE)
   stride = 0;
 
-   return bo_alloc_internal(bufmgr, name, size, flags, tiling, stride);
+   return bo_alloc_internal(bufmgr, name, size, flags,
+memzone, tiling, stride);
 }
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index 68f5e0c2c85..9ad129744b2 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -47,6 +47,42 @@ extern "C" {
 struct gen_device_info;
 struct brw_context;
 
+/**
+ * Memory zones.  When allocating a buffer, you can request that it is
+ * placed into a specific region of the 

[Mesa-dev] [PATCH 07/10] i965: Prepare batchbuffer module for softpin support.

2018-05-03 Thread Kenneth Graunke
If EXEC_OBJECT_PINNED is set, we don't want to emit any relocations.
We simply want to add the BO to the validation list, and possibly mark
it as writeable.  The new brw_use_pinned_bo() interface does just that.

To avoid having to make every caller consider both the relocation and
softpin cases, we make emit_reloc() call brw_use_pinned_bo() when given
a softpinned buffer.

We also can't grow buffers that are softpinned - the mechanism places a
larger BO at the same offset as the original, which requires moving BOs
around in the VMA.  With softpin, we only allocate enough VMA for the
original size of the BO.
---
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 40 +--
 src/mesa/drivers/dri/i965/intel_batchbuffer.h |  4 ++
 2 files changed, 40 insertions(+), 4 deletions(-)

Overallocating is gross, but I have to make incremental progress somehow.
For batch buffers, the ultimate plan is to switch from growing to batch
chaining (just create another batch and MI_BATCH_BUFFER_START to GOTO
the new batch and carry on).  We can do that on Gen8+.  It's easier to
do that in the softpin world - otherwise, we'd need a third set of
relocation lists, which gets messier.

For state buffers, the plan is to set Dynamic State Base Address to a
fixed 4GB region of the VMA, then just use intel_upload.c to make
however many buffers we need, allocated out of that memzone.  Being able
to fix BO addresses within 4GB of the base address eliminates the need
to force all state to be in a single BO, and gives us a lot more
flexibility with less magic required.

But again...can't convert everything overnight, especially when having
to care about older hardware and ancient kernels.

diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index fe1ea02ca41..96f1a238c0d 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -232,6 +232,10 @@ recreate_growing_buffer(struct brw_context *brw,
struct intel_batchbuffer *batch = >batch;
struct brw_bufmgr *bufmgr = screen->bufmgr;
 
+   /* We can't grow buffers when using softpin, so just overallocate them. */
+   if (brw_using_softpin(bufmgr))
+  size *= 2;
+
grow->bo = brw_bo_alloc(bufmgr, name, size, memzone);
grow->bo->kflags |= can_do_exec_capture(screen) ? EXEC_OBJECT_CAPTURE : 0;
grow->partial_bo = NULL;
@@ -389,6 +393,13 @@ grow_buffer(struct brw_context *brw,
struct brw_bufmgr *bufmgr = brw->bufmgr;
struct brw_bo *bo = grow->bo;
 
+   /* We can't grow buffers that are softpinned, as the growing mechanism
+* involves putting a larger buffer at the same gtt_offset...and we've
+* only allocated the smaller amount of VMA.  Without relocations, this
+* simply won't work.  This should never happen, however.
+*/
+   assert(!(bo->kflags & EXEC_OBJECT_PINNED));
+
perf_debug("Growing %s - ran out of space\n", bo->name);
 
if (grow->partial_bo) {
@@ -730,7 +741,8 @@ execbuffer(int fd,
   bo->index = -1;
 
   /* Update brw_bo::gtt_offset */
-  if (batch->validation_list[i].offset != bo->gtt_offset) {
+  if (!(bo->kflags & EXEC_OBJECT_PINNED) &&
+  batch->validation_list[i].offset != bo->gtt_offset) {
  DBG("BO %d migrated: 0x%" PRIx64 " -> 0x%llx\n",
  bo->gem_handle, bo->gtt_offset,
  batch->validation_list[i].offset);
@@ -933,6 +945,14 @@ emit_reloc(struct intel_batchbuffer *batch,
 {
assert(target != NULL);
 
+   if (target->kflags & EXEC_OBJECT_PINNED) {
+  brw_use_pinned_bo(batch, target, reloc_flags & RELOC_WRITE);
+  return target->gtt_offset + target_offset;
+   }
+
+   unsigned int index = add_exec_bo(batch, target);
+   struct drm_i915_gem_exec_object2 *entry = >validation_list[index];
+
if (rlist->reloc_count == rlist->reloc_array_size) {
   rlist->reloc_array_size *= 2;
   rlist->relocs = realloc(rlist->relocs,
@@ -940,9 +960,6 @@ emit_reloc(struct intel_batchbuffer *batch,
   sizeof(struct drm_i915_gem_relocation_entry));
}
 
-   unsigned int index = add_exec_bo(batch, target);
-   struct drm_i915_gem_exec_object2 *entry = >validation_list[index];
-
if (reloc_flags & RELOC_32BIT) {
   /* Restrict this buffer to the low 32 bits of the address space.
*
@@ -976,6 +993,21 @@ emit_reloc(struct intel_batchbuffer *batch,
return entry->offset + target_offset;
 }
 
+void
+brw_use_pinned_bo(struct intel_batchbuffer *batch, struct brw_bo *bo,
+  unsigned writable_flag)
+{
+   assert(bo->kflags & EXEC_OBJECT_PINNED);
+   assert((writable_flag & ~EXEC_OBJECT_WRITE) == 0);
+
+   unsigned int index = add_exec_bo(batch, bo);
+   struct drm_i915_gem_exec_object2 *entry = >validation_list[index];
+   assert(entry->offset == bo->gtt_offset);
+
+   if (writable_flag)
+  entry->flags |= EXEC_OBJECT_WRITE;
+}
+
 uint64_t
 brw_batch_reloc(struct intel_batchbuffer *batch, 

[Mesa-dev] [PATCH 08/10] i965: Emit VF cache invalidates for 48-bit addressing bugs with softpin.

2018-05-03 Thread Kenneth Graunke
We'd like to start using soft-pin to assign BO addresses up front, and
never move them again.  Our previous plan for dealing with 48-bit VF
cache bugs was to relocate vertex buffers to the low 4GB, so we'd never
have addresses that alias in the low 32 bits.  But that requires moving
buffers dynamically.

This patch tracks the last seen BO address for each vertex/index buffer,
and emits a VF cache invalidate if the high bits change.  (Ideally, we
won't hit this case very often.)  This should work for the soft-pin
case, but unfortunately won't work in the relocation case, as we don't
actually know the addresses.  So, we have to use both methods.
---
 src/mesa/drivers/dri/i965/brw_context.h   |  6 ++
 src/mesa/drivers/dri/i965/genX_state_upload.c | 62 +++
 2 files changed, 68 insertions(+)

Migration is nice at times, but keeping everything static is also really
nice for pre-baking states...hard to know exactly what to do here.  It
would be nice if we could allocate all GL buffer objects that might be
VBOs out of the same 4GB, but that's...difficult to know, and might be
too limiting.  *shrug*

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index fb637e22281..7d6aa1a9c51 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -983,6 +983,9 @@ struct brw_context
 
   /* For the initial pushdown, keep the list of vbo inputs. */
   struct vbo_inputs draw_arrays;
+
+  /* High bits of the last seen vertex buffer address (for workarounds). */
+  uint16_t last_bo_high_bits[33];
} vb;
 
struct {
@@ -1003,6 +1006,9 @@ struct brw_context
* referencing the same index buffer.
*/
   unsigned int start_vertex_offset;
+
+  /* High bits of the last seen index buffer address (for workarounds). */
+  uint16_t last_bo_high_bits;
} ib;
 
/* Active vertex program:
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index b1867c1a1cc..e517b91de93 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -480,6 +480,64 @@ upload_format_size(uint32_t upload_format)
}
 }
 
+static UNUSED uint16_t
+pinned_bo_high_bits(struct brw_bo *bo)
+{
+   return (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32ull : 0;
+}
+
+/* The VF cache designers apparently cut corners, and made the cache
+ * only consider the bottom 32 bits of memory addresses.  If you happen
+ * to have two vertex buffers which get placed exactly 4 GiB apart and
+ * use them in back-to-back draw calls, you can get collisions.
+ *
+ * In the soft-pin world, we'd like to assign addresses up front, and never
+ * move buffers.  So, we need to do a VF cache invalidate if the buffer for
+ * a particular VB slot has different [48:32] address bits than the last one.
+ *
+ * In the relocation world, we have no idea what the addresses will be, so
+ * we can't apply this workaround.  Instead, we tell the kernel to move it
+ * to the low 4GB regardless.
+ */
+static void
+vf_invalidate_for_vb_48bit_transitions(struct brw_context *brw)
+{
+#if GEN_GEN >= 8
+   bool need_invalidate = true;
+   unsigned i;
+
+   for (i = 0; i < brw->vb.nr_buffers; i++) {
+  uint16_t high_bits = pinned_bo_high_bits(brw->vb.buffers[i].bo);
+
+  if (high_bits != brw->vb.last_bo_high_bits[i]) {
+ need_invalidate = true;
+ brw->vb.last_bo_high_bits[i] = high_bits;
+  }
+   }
+
+   /* Don't bother with draw parameter buffers - those are generated by
+* the driver so we can select a consistent memory zone.
+*/
+
+   if (need_invalidate) {
+  brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE);
+   }
+#endif
+}
+
+static void
+vf_invalidate_for_ib_48bit_transition(struct brw_context *brw)
+{
+#if GEN_GEN >= 8
+   uint16_t high_bits = pinned_bo_high_bits(brw->ib.bo);
+
+   if (high_bits != brw->ib.last_bo_high_bits) {
+  brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE);
+  brw->ib.last_bo_high_bits = high_bits;
+   }
+#endif
+}
+
 static void
 genX(emit_vertices)(struct brw_context *brw)
 {
@@ -595,6 +653,8 @@ genX(emit_vertices)(struct brw_context *brw)
const unsigned nr_buffers = brw->vb.nr_buffers +
   uses_draw_params + uses_derived_draw_params;
 
+   vf_invalidate_for_vb_48bit_transitions(brw);
+
if (nr_buffers) {
   assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17));
 
@@ -890,6 +950,8 @@ genX(emit_index_buffer)(struct brw_context *brw)
if (index_buffer == NULL)
   return;
 
+   vf_invalidate_for_ib_48bit_transition(brw);
+
brw_batch_emit(brw, GENX(3DSTATE_INDEX_BUFFER), ib) {
 #if GEN_GEN < 8 && !GEN_IS_HASWELL
   ib.CutIndexEnable = brw->prim_restart.enable_cut_index;
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

[Mesa-dev] [PATCH 01/10] i965: Set initial kflags on BO creation.

2018-05-03 Thread Kenneth Graunke
This simplifies kflag initialization, by creating a bufmgr-wide setting
for initial kflags, and just applying it whenever we create a new BO.

This also properly allows 48-bit addresses for imported BOs (via prime
or flink), which I had missed in my earlier 48-bit support series.

This will be useful when adding softpin support, as we'll want to add
EXEC_OBJECT_PINNED to initial_kflags as well.
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 7cb1f03cf07..66f30a1637f 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -119,7 +119,8 @@ struct brw_bufmgr {
bool has_llc:1;
bool has_mmap_wc:1;
bool bo_reuse:1;
-   bool supports_48b_addresses:1;
+
+   uint64_t initial_kflags;
 };
 
 static int bo_set_tiling_internal(struct brw_bo *bo, uint32_t tiling_mode,
@@ -407,8 +408,7 @@ retry:
bo->reusable = true;
bo->cache_coherent = bufmgr->has_llc;
bo->index = -1;
-   if (bufmgr->supports_48b_addresses)
-  bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+   bo->kflags = bufmgr->initial_kflags;
 
mtx_unlock(>lock);
 
@@ -537,6 +537,7 @@ brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
bo->global_name = handle;
bo->reusable = false;
bo->external = true;
+   bo->kflags = bufmgr->initial_kflags;
 
_mesa_hash_table_insert(bufmgr->handle_table, >gem_handle, bo);
_mesa_hash_table_insert(bufmgr->name_table, >global_name, bo);
@@ -641,7 +642,6 @@ bo_unreference_final(struct brw_bo *bo, time_t time)
   bo->free_time = time;
 
   bo->name = NULL;
-  bo->kflags = 0;
 
   list_addtail(>head, >head);
} else {
@@ -1157,6 +1157,7 @@ brw_bo_gem_create_from_prime_internal(struct brw_bufmgr 
*bufmgr, int prime_fd,
bo->name = "prime";
bo->reusable = false;
bo->external = true;
+   bo->kflags = bufmgr->initial_kflags;
 
if (tiling_mode < 0) {
   struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle };
@@ -1438,8 +1439,12 @@ brw_bufmgr_init(struct gen_device_info *devinfo, int fd)
 
bufmgr->has_llc = devinfo->has_llc;
bufmgr->has_mmap_wc = gem_param(fd, I915_PARAM_MMAP_VERSION) > 0;
-   bufmgr->supports_48b_addresses = devinfo->gen >= 8 &&
-  gtt_size > (4ULL << 30 /* GiB */);
+
+   const uint64_t _4GB = 4ull << 30;
+
+   if (devinfo->gen >= 8 && gtt_size > _4GB) {
+  bufmgr->initial_kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+   }
 
init_cache_buckets(bufmgr);
 
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/10] i965: Softpin all buffers and never use relocations.

2018-05-03 Thread Kenneth Graunke
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

This enables it for Broadwell (with a 64-bit kernel) and Skylake+ (with
any kernel).  Unfortunately, it doesn't enable it for Cherryview as that
has a 32-bit GTT.  We could switch that over as well, but we'd have to
have a single memory zone, which is kind of a special case...

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 07c0d2f7633..4fd95e1d78c 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -1731,7 +1731,7 @@ brw_bufmgr_init(struct gen_device_info *devinfo, int fd)
if (devinfo->gen >= 8 && gtt_size > _4GB) {
   bufmgr->initial_kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
 
-  if (false && gem_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN) > 0) {
+  if (gem_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN) > 0) {
  bufmgr->initial_kflags |= EXEC_OBJECT_PINNED;
 
  util_vma_heap_init(>vma_allocator[BRW_MEMZONE_LOW_4G],
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/10] i965: Require softpin support for Cannonlake and later.

2018-05-03 Thread Kenneth Graunke
This isn't strictly necessary, but anyone running Cannonlake will
already have Kernel 4.5 or later, so there's no reason to support
the relocation model on Gen10+.

This will let us avoid dealing with them for new features.
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 4fd95e1d78c..9a059f38aaa 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -1738,6 +1738,10 @@ brw_bufmgr_init(struct gen_device_info *devinfo, int fd)
 4096, _4GB);
  util_vma_heap_init(>vma_allocator[BRW_MEMZONE_OTHER],
 1 * _4GB, gtt_size - 1 * _4GB);
+  } else if (devinfo->gen >= 10) {
+ fprintf(stderr, "i965 requires softpin (Kernel 4.5) on Gen10+.");
+ free(bufmgr);
+ return NULL;
   }
}
 
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/10] i965: Dump validation list on INTEL_DEBUG=bat, submit.

2018-05-03 Thread Kenneth Graunke
This is really useful when debugging any sort of buffer management
issues, so just printing it during INTEL_DEBUG=bat,submit seems
reasonable.  With bat, we're already spamming so much output that
it doesn't really hurt.  With submit, it's still easy to grep for
the older information, and the new information is nice too.
---
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index bac6e6dae85..8c5fd50123a 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -55,7 +55,7 @@
 static void
 intel_batchbuffer_reset(struct brw_context *brw);
 
-UNUSED static void
+static void
 dump_validation_list(struct intel_batchbuffer *batch)
 {
fprintf(stderr, "Validation list (length %d):\n", batch->exec_count);
@@ -880,6 +880,8 @@ _intel_batchbuffer_flush_fence(struct brw_context *brw,
   (float) brw->batch.aperture_space / (1024 * 1024),
   brw->batch.batch_relocs.reloc_count,
   brw->batch.state_relocs.reloc_count);
+
+  dump_validation_list(>batch);
}
 
ret = submit_batch(brw, in_fence_fd, out_fence_fd);
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/10] util: Add a virtual memory allocator

2018-05-03 Thread Kenneth Graunke
From: Jason Ekstrand 

This is simple linear-walk first-fit allocator roughly based on the
allocator in the radeon winsys code.  This allocator has two primary
functional differences:

 1) It cleanly returns 0 on allocation failure

 2) It allocates addresses top-down instead of bottom-up.

The second one is needed for Intel because high addresses (with bit 47
set) need to be canonicalized in order to work properly.  If we allocate
bottom-up, then high addresses will be very rare (if they ever happen).
We'd rather always have high addresses so that the canonicalization code
gets better testing.
---
 src/util/Makefile.sources |   4 +-
 src/util/meson.build  |   2 +
 src/util/vma.c| 232 ++
 src/util/vma.h|  53 +
 4 files changed, 290 insertions(+), 1 deletion(-)
 create mode 100644 src/util/vma.c
 create mode 100644 src/util/vma.h

This is the exact same patch as Scott recently sent:
https://patchwork.freedesktop.org/patch/219951/

It may make more sense to review it on that existing thread.  I mostly
resent it here to note that it's required for this series.

diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
index 104ecae8ed3..534520ce763 100644
--- a/src/util/Makefile.sources
+++ b/src/util/Makefile.sources
@@ -56,7 +56,9 @@ MESA_UTIL_FILES := \
u_string.h \
u_thread.h \
u_vector.c \
-   u_vector.h
+   u_vector.h \
+   vma.c \
+   vma.h
 
 MESA_UTIL_GENERATED_FILES = \
format_srgb.c
diff --git a/src/util/meson.build b/src/util/meson.build
index eece1cefef6..14660e0fa0c 100644
--- a/src/util/meson.build
+++ b/src/util/meson.build
@@ -81,6 +81,8 @@ files_mesa_util = files(
   'u_thread.h',
   'u_vector.c',
   'u_vector.h',
+  'vma.c',
+  'vma.h',
 )
 
 install_data('drirc', install_dir : get_option('sysconfdir'))
diff --git a/src/util/vma.c b/src/util/vma.c
new file mode 100644
index 000..3d61f6969ed
--- /dev/null
+++ b/src/util/vma.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include 
+
+#include "util/u_math.h"
+#include "util/vma.h"
+
+struct util_vma_hole {
+   struct list_head link;
+   uint64_t offset;
+   uint64_t size;
+};
+
+#define util_vma_foreach_hole(_hole, _heap) \
+   list_for_each_entry(struct util_vma_hole, _hole, &(_heap)->holes, link)
+
+#define util_vma_foreach_hole_safe(_hole, _heap) \
+   list_for_each_entry_safe(struct util_vma_hole, _hole, &(_heap)->holes, link)
+
+void
+util_vma_heap_init(struct util_vma_heap *heap,
+   uint64_t start, uint64_t size)
+{
+   list_inithead(>holes);
+   util_vma_heap_free(heap, start, size);
+}
+
+void
+util_vma_heap_finish(struct util_vma_heap *heap)
+{
+   util_vma_foreach_hole_safe(hole, heap)
+  free(hole);
+}
+
+static void
+util_vma_heap_validate(struct util_vma_heap *heap)
+{
+   uint64_t prev_offset = 0;
+   util_vma_foreach_hole(hole, heap) {
+  assert(hole->offset > 0);
+  assert(hole->size > 0);
+
+  if (>link == heap->holes.next) {
+ /* This must be the top-most hole.  Assert that, if it overflows, it
+  * overflows to 0, i.e. 2^64.
+  */
+ assert(hole->size + hole->offset == 0 ||
+hole->size + hole->offset > hole->offset);
+  } else {
+ /* This is not the top-most hole so it must not overflow and, in
+  * fact, must be strictly lower than the top-most hole.  If
+  * hole->size + hole->offset == prev_offset, then we failed to join
+  * holes during a util_vma_heap_free.
+  */
+ assert(hole->size + hole->offset > hole->offset &&
+hole->size + hole->offset < prev_offset);
+  }
+  prev_offset = hole->offset;
+   }
+}
+
+uint64_t
+util_vma_heap_alloc(struct util_vma_heap *heap,
+

[Mesa-dev] [PATCH 06/10] i965: Add virtual memory allocator infrastructure to brw_bufmgr.

2018-05-03 Thread Kenneth Graunke
This introduces a new fast virtual memory allocator integrated with our
BO cache bucketing.  For larger objects, it falls back to the simple
free-list allocator (util_vma).

This puts the allocators in place but doesn't enable softpin yet.
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 291 -
 src/mesa/drivers/dri/i965/brw_bufmgr.h |   2 +
 2 files changed, 292 insertions(+), 1 deletion(-)

I'm happy to write more comments here.  It's a pretty simple system, but
not necessarily the most intuitive.  Feel free to ask questions.

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 66828f319be..07c0d2f7633 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -60,6 +60,8 @@
 #include "util/macros.h"
 #include "util/hash_table.h"
 #include "util/list.h"
+#include "util/u_dynarray.h"
+#include "util/vma.h"
 #include "brw_bufmgr.h"
 #include "brw_context.h"
 #include "string.h"
@@ -98,9 +100,40 @@ atomic_add_unless(int *v, int add, int unless)
return c == unless;
 }
 
+/**
+ * i965 fixed-size bucketing VMA allocator.
+ *
+ * The BO cache maintains "cache buckets" for buffers of various sizes.
+ * All buffers in a given bucket are identically sized - when allocating,
+ * we always round up to the bucket size.  This means that virtually all
+ * allocations are fixed-size; only buffers which are too large to fit in
+ * a bucket can be variably-sized.
+ *
+ * We create an allocator for each bucket.  Each contains a free-list, where
+ * each node contains a  pair.  Each bit
+ * represents a bucket-sized block of memory.  (At the first level, each
+ * bit corresponds to a page.  For the second bucket, bits correspond to
+ * two pages, and so on.)  1 means a block is free, and 0 means it's in-use.
+ *
+ * This makes allocations cheap - any bit of any node will do.  We can pick
+ * the head of the list and use ffs() to find a free block.  If there are
+ * none, we allocate 64 blocks from a larger allocator - either a bigger
+ * bucketing allocator, or a fallback top-level allocator for large objects.
+ */
+struct vma_bucket_node {
+   uint64_t start_address;
+   uint64_t bitmap;
+};
+
 struct bo_cache_bucket {
+   /** List of cached BOs. */
struct list_head head;
+
+   /** Size of this bucket, in bytes. */
uint64_t size;
+
+   /** List of vma_bucket_nodes. */
+   struct util_dynarray vma_list[BRW_MEMZONE_COUNT];
 };
 
 struct brw_bufmgr {
@@ -116,6 +149,8 @@ struct brw_bufmgr {
struct hash_table *name_table;
struct hash_table *handle_table;
 
+   struct util_vma_heap vma_allocator[BRW_MEMZONE_COUNT];
+
bool has_llc:1;
bool has_mmap_wc:1;
bool bo_reuse:1;
@@ -128,6 +163,10 @@ static int bo_set_tiling_internal(struct brw_bo *bo, 
uint32_t tiling_mode,
 
 static void bo_free(struct brw_bo *bo);
 
+static uint64_t __vma_alloc(struct brw_bufmgr *bufmgr,
+enum brw_memory_zone memzone,
+uint64_t size, uint64_t alignment);
+
 static uint32_t
 key_hash_uint(const void *key)
 {
@@ -222,6 +261,198 @@ bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
   >cache_bucket[index] : NULL;
 }
 
+static enum brw_memory_zone
+memzone_for_address(uint64_t address)
+{
+   const uint64_t _4GB = 1ull << 32;
+
+   if (address >= _4GB)
+  return BRW_MEMZONE_OTHER;
+
+   return BRW_MEMZONE_LOW_4G;
+}
+
+static uint64_t
+bucket_vma_alloc(struct brw_bufmgr *bufmgr,
+ struct bo_cache_bucket *bucket,
+ enum brw_memory_zone memzone)
+{
+   struct util_dynarray *vma_list = >vma_list[memzone];
+   struct vma_bucket_node *node;
+
+   if (vma_list->size == 0) {
+  /* This bucket allocator is out of space - allocate a new block of
+   * memory for 64 blocks from a larger allocator (either a larger
+   * bucket or util_vma).
+   *
+   * We align the address to the node size (64 blocks) so that
+   * bucket_vma_free can easily compute the starting address of this
+   * block by rounding any address we return down to the node size.
+   *
+   * Set the first bit used, and return the start address.
+   */
+  uint64_t node_size = 64ull * bucket->size;
+  node = util_dynarray_grow(vma_list, sizeof(struct vma_bucket_node));
+  node->start_address = __vma_alloc(bufmgr, memzone, node_size, node_size);
+  node->bitmap = ~1ull;
+  return node->start_address;
+   }
+
+   /* Pick any bit from any node - they're all the right size and free. */
+   node = util_dynarray_top_ptr(vma_list, struct vma_bucket_node);
+   int bit = ffsll(node->bitmap) - 1;
+   assert(bit >= 0 && bit <= 63);
+
+   /* Reserve the memory by clearing the bit. */
+   assert((node->bitmap & (1ull << bit)) != 0ull);
+   node->bitmap &= ~(1ull << bit);
+
+   /* If this node is now completely full, remove it from the free list. */
+   if (node->bitmap == 0ull) {
+  (void) 

[Mesa-dev] [PATCH 03/10] util: Make vma.c support non-power-of-two alignments.

2018-05-03 Thread Kenneth Graunke
I want to use this in a bucketing allocator for i965.
---
 src/util/vma.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/util/vma.c b/src/util/vma.c
index 3d61f6969ed..d6ee05988ef 100644
--- a/src/util/vma.c
+++ b/src/util/vma.c
@@ -88,7 +88,6 @@ util_vma_heap_alloc(struct util_vma_heap *heap,
assert(size > 0);
 
assert(alignment > 0);
-   assert(util_is_power_of_two_nonzero(alignment));
 
util_vma_heap_validate(heap);
 
@@ -107,7 +106,7 @@ util_vma_heap_alloc(struct util_vma_heap *heap,
   /* Align the offset.  We align down and not up because we are allocating
* from the top of the hole and not the bottom.
*/
-  offset &= ~(alignment - 1);
+  offset = (offset / alignment) * alignment;
 
   if (offset < hole->offset)
  continue;
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] swr: Fix include for createInstructionCombiningPass with llvm-7.0.

2018-05-03 Thread Kyriazis, George
It would be nice to avoid the extra #if LLVM_VERSION_MAJOR and include 
InstCombine.h into the same #if below, as follows:

--- a/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp
@@ -69,6 +69,7 @@ using PassManager = llvm::legacy::PassManager;
#include "llvm/Transforms/Scalar.h"
#if LLVM_VERSION_MAJOR >= 7
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
#endif
#include "llvm/Support/Host.h"
#include "llvm/Support/DynamicLibrary.h”

With the above change

Reviewed-By: George Kyriazis 
>

Thanks

George

On Apr 29, 2018, at 1:40 AM, Vinson Lee 
> wrote:

Fix build error after llvm-7.0.0svn r330669 ("InstCombine: Fix layering
by not including Scalar.h in InstCombine").

 CXX  rasterizer/jitter/libmesaswr_la-blend_jit.lo
rasterizer/jitter/blend_jit.cpp:816:20: error: use of undeclared identifier 
'createInstructionCombiningPass'; did you mean 
'createInstructionSimplifierPass'?
   passes.add(createInstructionCombiningPass());
  ^~
  createInstructionSimplifierPass

Signed-off-by: Vinson Lee >
---
src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp | 3 +++
1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp 
b/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp
index 216938fa7b07..6e4d24dde8fa 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp
@@ -66,6 +66,9 @@ using PassManager = llvm::legacy::PassManager;
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Transforms/IPO.h"
+#if LLVM_VERSION_MAJOR >= 7
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#endif
#include "llvm/Transforms/Scalar.h"
#if LLVM_VERSION_MAJOR >= 7
#include "llvm/Transforms/Utils.h"
--
2.17.0


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 21/22] i965: Setup glsl uniforms by index rather than name matching

2018-05-03 Thread Timothy Arceri

On 03/05/18 22:50, Neil Roberts wrote:

Thanks for the feedback.

I’ve implemented your suggestion as the top two patches on a branch
here:

https://github.com/Igalia/mesa/tree/nroberts/count-uniform-storage

It also fixes a slight bug that it wasn’t passing down the
uniform_storage_locs argument into the recursion.

However I think my preference would be for keeping this as a separate
function because it seems a little confusing to make the function count two
different things depending on a bool argument. Perhaps for the sake of
maintainability it would be good to move it to glsl_types though.


Well we do a similar thing for count_attribute_slots() so I don't think 
its to big of a deal.




If there is a strong preference to merge it into one function then of
course I don’t mind.


As long as we document the difference between the two functions having 
two would probably be ok also, although my preference is to just have 
the one. Up to you others might disagree with me.




Regards,
- Neil

Timothy Arceri  writes:


On 18/04/18 00:36, Alejandro Piñeiro wrote:

From: Neil Roberts 

Previously when setting up a uniform it would try to walk the uniform
storage slots and find one that matches the name of the given
variable. However, each variable already has a location which is an
index into the UniformStorage array so we can just directly jump to
the right slot. Some of the variables take up more than one slot so we
still need to calculate how many it uses.

The main reason to do this is to support ARB_gl_spirv because in that
case the uniforms don’t have names so the previous approach won’t
work.


This is a nice improvement away :)

However it might be nicer to just update the existing
glsl_type::uniform_locations() helper
rather than create the custom count_uniform_storage_slots() helper.

e.g

unsigned
glsl_type::uniform_locations(bool uniform_storage_locs) const
{
 unsigned size = 0;

 switch (this->base_type) {
 case GLSL_TYPE_UINT:
 case GLSL_TYPE_INT:
 case GLSL_TYPE_FLOAT:
 case GLSL_TYPE_FLOAT16:
 case GLSL_TYPE_DOUBLE:
 case GLSL_TYPE_UINT16:
 case GLSL_TYPE_UINT8:
 case GLSL_TYPE_INT16:
 case GLSL_TYPE_INT8:
 case GLSL_TYPE_UINT64:
 case GLSL_TYPE_INT64:
 case GLSL_TYPE_BOOL:
 case GLSL_TYPE_SAMPLER:
 case GLSL_TYPE_IMAGE:
 case GLSL_TYPE_SUBROUTINE:
return 1;

 case GLSL_TYPE_STRUCT:
 case GLSL_TYPE_INTERFACE:
for (unsigned i = 0; i < this->length; i++)
   size += this->fields.structure[i].type->uniform_locations();
return size;
 case GLSL_TYPE_ARRAY:
if (!uniform_storage_locs ||
this->fields.array->is_array() ||
this->fields.array->is_interface() ||
this->fields.array->is_record()) {
/* For uniform locations passed to the user via the API we
 * count all array elements.
 */
   return this->length * this->fields.array->uniform_locations();
} else {
   /* For uniform storage the innermost array only uses a
* single slot.
*/
   return 1;
}
 default:
return 0;
 }
}

If you agree this patch is:

Reviewed-by: Timothy Arceri 


---
   src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 50 
+++---
   1 file changed, 37 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp 
b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
index 62b2951432a..cb5e07f74ba 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
+++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
@@ -118,35 +118,59 @@ brw_setup_image_uniform_values(gl_shader_stage stage,
  }
   }
   
+static unsigned

+count_uniform_storage_slots(const struct glsl_type *type)
+{
+   /* gl_uniform_storage can cope with one level of array, so if the
+* type is a composite type or an array where each element occupies
+* more than one slot than we need to recursively process it.
+*/
+   if (glsl_type_is_struct(type)) {
+  unsigned location_count = 0;
+
+  for (unsigned i = 0; i < glsl_get_length(type); i++) {
+ const struct glsl_type *field_type = glsl_get_struct_field(type, i);
+
+ location_count += count_uniform_storage_slots(field_type);
+  }
+
+  return location_count;
+   }
+
+   if (glsl_type_is_array(type)) {
+  const struct glsl_type *element_type = glsl_get_array_element(type);
+
+  if (glsl_type_is_array(element_type) ||
+  glsl_type_is_struct(element_type)) {
+ unsigned element_count = count_uniform_storage_slots(element_type);
+ return element_count * glsl_get_length(type);
+  }
+   }
+
+   return 1;
+}
+
   static void
   brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
  const struct gl_program *prog,
  

[Mesa-dev] [Bug 106394] Black Mesa cannot compile shaders because of missing GL_EXT_gpu_shader4

2018-05-03 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106394

--- Comment #2 from Timothy Arceri  ---
It looks like they are now using a compat profile which is unfortunate.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 00/18] anv: add shaderInt16 support

2018-05-03 Thread Mark Janes
Clayton Craft  writes:

> Quoting Iago Toral Quiroga (2018-04-30 07:18:08)
>> This version addresses the feedback received to v1, which includes moving the
>> bit-size lowering pass from intel to core NIR (patch 8) and a separate patch
>> to add Intel's specific configuration for int16 (patch 9), and then it also
>> adds a few things that were missing in the first version, namely, a fix for
>> 16-bit comparisons to emit 32-bit booleans (patch 10 -a patch to optimize the
>> resulting code will come later-) and 16-bit pack/unpack which is needed for
>> 16-bit bitcasts (patches 11-15).
>> 
> Since this patch series was merged, we are seeing a number of failures in CI 
> on
> BSW, GLK, and BXT platforms:
>
> dEQP-VK.spirv_assembly.instruction.compute.sconvert.int16_to_int64
> dEQP-VK.spirv_assembly.instruction.compute.uconvert.uint16_to_uint64
> dEQP-VK.spirv_assembly.instruction.compute.sconvert.int16_to_uint64
>

I've reverted this patch, due to:
 https://bugs.freedesktop.org/show_bug.cgi?id=106393

We can't execute CI tests due to that bug, so I couldn't leave the patch
in place.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/9] anv: soft pin state pools

2018-05-03 Thread Jason Ekstrand
I commented on this in the office, but I think this whole thing would be
cleaner if we just clearly documented address ranges in anv_private.h with
a good comment.  Something like

#define LOW_HEAP_BASE_ADDRESS 4096
#define LOW_HEAP_SIZE ((3ull << 30) - 4096)
#define DYNAMIC_STATE_POOL_ADDRESS (3ull << 30)
#define BINDING_TABLE_POOL_ADDRESS (4ull << 30)
#define SURFACE_STATE_POOL_ADDRESS (5ull << 30)

Maybe we want it in hex?  I'm not sure.  In any case, I think having the
layout explicit is better.

On Wed, May 2, 2018 at 9:01 AM, Scott D Phillips  wrote:

> The state_pools reserve virtual address space of the full
> BLOCK_POOL_MEMFD_SIZE, but maintain the current behavior of
> growing from the middle.
> ---
>  src/intel/vulkan/anv_allocator.c | 25 +
>  src/intel/vulkan/anv_device.c| 13 +
>  src/intel/vulkan/anv_private.h   |  2 ++
>  3 files changed, 36 insertions(+), 4 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_allocator.c
> b/src/intel/vulkan/anv_allocator.c
> index 642e1618c10..fa4e7d74ac7 100644
> --- a/src/intel/vulkan/anv_allocator.c
> +++ b/src/intel/vulkan/anv_allocator.c
> @@ -250,6 +250,27 @@ anv_block_pool_init(struct anv_block_pool *pool,
>
> pool->device = device;
> pool->bo_flags = bo_flags;
> +
> +   if (bo_flags & EXEC_OBJECT_PINNED) {
> +  pool->offset = 0;
> +
> +  pthread_mutex_lock(>vma_mutex);
> +
> +  if (bo_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)
> + pool->offset = util_vma_heap_alloc(>vma_hi,
> +BLOCK_POOL_MEMFD_SIZE, 4096);
> +
> +  if (!pool->offset)
> + pool->offset = util_vma_heap_alloc(>vma_lo,
> +BLOCK_POOL_MEMFD_SIZE, 4096);
> +
> +  pthread_mutex_unlock(>vma_mutex);
> +
> +  if (!pool->offset)
> + return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
> +
> +  pool->offset = canonical_address(pool->offset);
> +   }
> anv_bo_init(>bo, 0, 0);
>
> pool->fd = memfd_create("block pool", MFD_CLOEXEC);
> @@ -402,6 +423,10 @@ anv_block_pool_expand_range(struct anv_block_pool
> *pool,
>  * hard work for us.
>  */
> anv_bo_init(>bo, gem_handle, size);
> +   if (pool->bo_flags & EXEC_OBJECT_PINNED) {
> +  pool->bo.offset = pool->offset + BLOCK_POOL_MEMFD_CENTER -
> + center_bo_offset;
> +   }
> pool->bo.flags = pool->bo_flags;
> pool->bo.map = map;
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index d3d9c779d62..2837d2f83ca 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -1613,12 +1613,17 @@ VkResult anv_CreateDevice(
> if (result != VK_SUCCESS)
>goto fail_batch_bo_pool;
>
> -   /* For the state pools we explicitly disable 48bit. */
> -   bo_flags = (physical_device->has_exec_async ? EXEC_OBJECT_ASYNC : 0) |
> -  (physical_device->has_exec_capture ? EXEC_OBJECT_CAPTURE :
> 0);
> +   if (physical_device->has_exec_softpin)
> +  bo_flags |= EXEC_OBJECT_PINNED;
> +   else
> +  bo_flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>
> +   /* dynamic_state_pool needs to stay in the same 4GiB as index and
> +* vertex buffers. For rationale, see the comment in
> +* anv_physical_device_init_heaps.
> +*/
> result = anv_state_pool_init(>dynamic_state_pool, device,
> 16384,
> -bo_flags);
> +bo_flags & ~EXEC_OBJECT_SUPPORTS_48B_ADDR
> ESS);
> if (result != VK_SUCCESS)
>goto fail_bo_cache;
>
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private
> .h
> index 708c3a540d3..23527eebaab 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -582,6 +582,8 @@ struct anv_block_pool {
>
> struct anv_bo bo;
>
> +   uint64_t offset;
>

This might be better named "start_address".  Also, we should have a comment
saying what it means. :-)


> +
> /* The offset from the start of the bo to the "center" of the block
>  * pool.  Pointers to allocated blocks are given by
>  * bo.map + center_bo_offset + offsets.
> --
> 2.14.3
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106394] Black Mesa cannot compile shaders because of missing GL_EXT_gpu_shader4

2018-05-03 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106394

Ian Romanick  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |WONTFIX

--- Comment #1 from Ian Romanick  ---
The application should use GLSL 1.30.  Mesa is not going to support
GL_EXT_gpu_shader4.  GLSL 1.30 has been around for a decade.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106394] Black Mesa cannot compile shaders because of missing GL_EXT_gpu_shader4

2018-05-03 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106394

Bug ID: 106394
   Summary: Black Mesa cannot compile shaders because of missing
GL_EXT_gpu_shader4
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: b...@besd.de
QA Contact: mesa-dev@lists.freedesktop.org

There was an update recently (I think 30.4.) that changed the render engine
they use, since then:

Various messages like these from the game:
ShaderAPIDX8::CreateD3DTexture: Invalid color format!
Compile of "vertexlit_and_unlit_generic_ps20b" Failed:
Compile of "vertexlit_and_unlit_generic_vs20" Failed:
IDirect3DDevice9::CreatePixelShader: shaderapi's centroid mask (0x)
differs from mask derived from shader name (0x000C) for shader ps-file
teeth_flashlight_ps20b ps-index 0 ps-combo 0

I dumped the shaders and did some runs

shader-db
ERROR: shaders/blackmesa/5073.shader_test failed to compile:
0:2(10): error: GLSL 1.50 is not supported. Supported versions are: 1.10, 1.20,
1.30, 1.40, 1.00 ES, 3.00 ES, 3.10 ES, and 3.20 ES

shader-db with MESA_GLSL_VERSION_OVERRIDE=150
ERROR: shaders/blackmesa/5073.shader_test failed to compile:
0:2(10): error: GLSL 1.50 is not supported. Supported versions are: 1.10, 1.20,
1.30, 1.40, 1.00 ES, 3.00 ES, 3.10 ES, and 3.20 ES

shader-db with MESA_GL_VERSION_OVERRIDE=4.5
ERROR: shaders/blackmesa/5421.shader_test failed to compile:
0:4(12): error: extension `GL_EXT_gpu_shader4' unsupported in vertex shader

if I try to advertise GL_EXT_gpu_shader4 in mesa/main/version.c (works for
gpu_shader5 and others) then the OpenGL core version becomes 2.0 and the
problem persists (also because I cant build 32-bit drivers on ubuntu)

   const bool ver_2_1 = (ver_2_0 &&
 extensions->EXT_pixel_buffer_object &&
 extensions->ARB_map_buffer_range &&
 extensions->ARB_draw_instanced &&
 extensions->ARB_shader_bit_encoding &&
 extensions->ARB_gpu_shader5 &&
// extensions->EXT_gpu_shader4 &&
 extensions->EXT_texture_sRGB);

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [AppVeyor] mesa master #7615 completed

2018-05-03 Thread AppVeyor


Build mesa 7615 completed



Commit 589622a2fe by Vinson Lee on 5/1/2018 6:57 AM:

swr/rast: Fix WriteBitcodeToFile usage with llvm-7.0.\n\nFix build error after llvm-7.0svn r325155 ("Pass a reference to a module\nto the bitcode writer.").\n\n  CXX  rasterizer/jitter/libmesaswr_la-JitManager.lo\nrasterizer/jitter/JitManager.cpp:548:30: error: reference to type 'const llvm::Module' could not bind to an lvalue of type 'const llvm::Module *'\nllvm::WriteBitcodeToFile(M, bitcodeStream);\n ^\n\nSuggested-by: George Kyriazis \nSigned-off-by: Vinson Lee \nReviewed-By: George Kyriazis 


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeon/vcn: fix mpeg4 msg buffer settings

2018-05-03 Thread boyuan.zhang
From: Boyuan Zhang 

Previous bit-fields assignments are incorrect and will result certain mpeg4
decode failed due to wrong flag values. This patch fixes these assignments.

Cc: 18.0 18.1 

Signed-off-by: Boyuan Zhang 
Reviewed-by: Leo Liu 
---
 src/gallium/drivers/radeon/radeon_vcn_dec.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.c 
b/src/gallium/drivers/radeon/radeon_vcn_dec.c
index f83e9e5..4bc922d 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_dec.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_dec.c
@@ -554,15 +554,15 @@ static rvcn_dec_message_mpeg4_asp_vld_t 
get_mpeg4_msg(struct radeon_decoder *dec
 
result.vop_time_increment_resolution = 
pic->vop_time_increment_resolution;
 
-   result.short_video_header |= pic->short_video_header << 0;
-   result.interlaced |= pic->interlaced << 2;
-result.load_intra_quant_mat |= 1 << 3;
-   result.load_nonintra_quant_mat |= 1 << 4;
-   result.quarter_sample |= pic->quarter_sample << 5;
-   result.complexity_estimation_disable |= 1 << 6;
-   result.resync_marker_disable |= pic->resync_marker_disable << 7;
-   result.newpred_enable |= 0 << 10; //
-   result.reduced_resolution_vop_enable |= 0 << 11;
+   result.short_video_header = pic->short_video_header;
+   result.interlaced = pic->interlaced;
+   result.load_intra_quant_mat = 1;
+   result.load_nonintra_quant_mat = 1;
+   result.quarter_sample = pic->quarter_sample;
+   result.complexity_estimation_disable = 1;
+   result.resync_marker_disable = pic->resync_marker_disable;
+   result.newpred_enable = 0;
+   result.reduced_resolution_vop_enable = 0;
 
result.quant_type = pic->quant_type;
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/9] anv: Add vma_heap allocators in anv_device

2018-05-03 Thread Jason Ekstrand
On Wed, May 2, 2018 at 9:01 AM, Scott D Phillips  wrote:

> These will be used to assign virtual addresses to soft pinned
> buffers in a later patch.
> ---
>  src/intel/vulkan/anv_device.c  | 75 ++
> 
>  src/intel/vulkan/anv_private.h | 11 +++
>  2 files changed, 86 insertions(+)
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index c0cec175826..d3d9c779d62 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -369,6 +369,8 @@ anv_physical_device_init(struct anv_physical_device
> *device,
> device->has_exec_async = anv_gem_get_param(fd,
> I915_PARAM_HAS_EXEC_ASYNC);
> device->has_exec_capture = anv_gem_get_param(fd,
> I915_PARAM_HAS_EXEC_CAPTURE);
> device->has_exec_fence = anv_gem_get_param(fd,
> I915_PARAM_HAS_EXEC_FENCE);
> +   device->has_exec_softpin = anv_gem_get_param(fd,
> I915_PARAM_HAS_EXEC_SOFTPIN)
> +  && device->supports_48bit_addresses;
>

I'd rather we call this something like use_softpin since it isn't just a
"does the kernel have this feature" flag.


> device->has_syncobj = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_
> ARRAY);
> device->has_syncobj_wait = device->has_syncobj &&
>anv_gem_supports_syncobj_wait(fd);
> @@ -1527,6 +1529,26 @@ VkResult anv_CreateDevice(
>goto fail_fd;
> }
>
> +   if (physical_device->has_exec_softpin) {
> +  if (pthread_mutex_init(>vma_mutex, NULL) != 0) {
> + result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
> + goto fail_fd;
> +  }
> +
> +  /* keep the page with address zero out of the allocator */
> +  util_vma_heap_init(>vma_lo, 4096, (1ull << 32) - 2 * 4096);
>

Why are you subtracting 2 * 4096?


> +  device->vma_lo_available =
> + physical_device->memory.heaps[physical_device->memory.heap_count
> - 1].size;
> +
> +  /* Leave the last 4GiB out of the high vma range, so that no state
> base
> +   * address + size can overflow 48 bits.
>

Might be good to have a more detailed comment here or at least reference
the comment in anv_allocator.c that deals with the workaround.


> +   */
> +  util_vma_heap_init(>vma_hi, (1ull << 32) + 4096,
> + (1ull << 48) - 2 * (1ull << 32) - 2 * 4096);
>

Why are you not starting at (1ull << 32)?

If neither of those have a good reason, then we should probably only drop
the bottom and top pages in the entire 48-bit range.


> +  device->vma_hi_available = physical_device->memory.heap_count == 1
> ? 0 :
> + physical_device->memory.heaps[0].size;
> +   }
> +
> /* As per spec, the driver implementation may deny requests to acquire
>  * a priority above the default priority (MEDIUM) if the caller does
> not
>  * have sufficient privileges. In this scenario
> VK_ERROR_NOT_PERMITTED_EXT
> @@ -1887,6 +1909,59 @@ VkResult anv_DeviceWaitIdle(
> return anv_device_submit_simple_batch(device, );
>  }
>
> +bool
> +anv_vma_alloc(struct anv_device *device, struct anv_bo *bo)
> +{
> +   if (!(bo->flags & EXEC_OBJECT_PINNED))
> +  return true;
>

When are things not pinned?


> +
> +   pthread_mutex_lock(>vma_mutex);
> +
> +   bo->offset = 0;
> +
> +   if (bo->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS &&
>
+   device->vma_hi_available >= bo->size) {
> +  uint64_t addr = util_vma_heap_alloc(>vma_hi, bo->size,
> 4096);
> +  if (addr) {
> + bo->offset = canonical_address(addr);
> + device->vma_hi_available -= bo->size;
> +  }
> +   }
> +
> +   if (bo->offset == 0 && device->vma_lo_available >= bo->size) {
> +  uint64_t addr = util_vma_heap_alloc(>vma_lo, bo->size,
> 4096);
> +  if (addr) {
> + bo->offset = canonical_address(addr);
> + device->vma_lo_available -= bo->size;
> +  }
> +   }
>

I'm not sure how I feel about using EXEC_OBJECT_SUPPORTS_48B_ADDRESS for
this.  I think it certainly works but it's not what I had pictured.


> +
> +   pthread_mutex_unlock(>vma_mutex);
> +
> +   return bo->offset != 0;
> +}
> +
> +void
> +anv_vma_free(struct anv_device *device, struct anv_bo *bo)
> +{
> +   if (!(bo->flags & EXEC_OBJECT_PINNED))
> +  return;
> +
> +   pthread_mutex_lock(>vma_mutex);
> +
> +   if (bo->offset >= 1ull << 32) {
> +  util_vma_heap_free(>vma_hi, bo->offset, bo->size);
> +  device->vma_hi_available += bo->size;
> +   } else {
> +  util_vma_heap_free(>vma_lo, bo->offset, bo->size);
> +  device->vma_lo_available += bo->size;
> +   }
> +
> +   pthread_mutex_unlock(>vma_mutex);
> +
> +   bo->offset = 0;
> +}
> +
>  VkResult
>  anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t
> size)
>  {
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
> private.h
> index 761601d1e37..708c3a540d3 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -49,6 +49,7 @@
>  

Re: [Mesa-dev] [PATCH 02/17] i965/miptree: Zero-initialize CCS_D buffers

2018-05-03 Thread Nanley Chery
On Thu, May 03, 2018 at 12:40:49PM -0700, Jason Ekstrand wrote:
> Good catch. Rb
> 

Thanks!

> On May 3, 2018 12:04:59 Nanley Chery  wrote:
> 
> > Before this patch, the aux_state was actually AUX_INVALID because the BO
> > was never defined. This was fine on single slice miptrees because we
> > would fast-clear the resource right after creation. For multi-slice
> > miptrees on SKL+ however, this results in undefined behavior when
> > accessing a non-base slice. Here's a specific example:
> > 
> > 1) Fast clear level 0
> >   * Undefined CCS_D buffer allocated in "PASS_THROUGH" state.
> >   * Level 0 transitions to the CLEAR state.
> > 2) Render to level 1
> >   * Level 1 may have a 2-bit pattern of 2's.
> >   * Rendering with a 2 in the CCS is undefined.
> > 
> > Cc: 
> > ---
> > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 10 --
> > 1 file changed, 4 insertions(+), 6 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > index 377efae32c9..182a896e23a 100644
> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > @@ -1806,13 +1806,11 @@ intel_miptree_alloc_ccs(struct brw_context *brw,
> > * A CCS value of 0 indicates that the corresponding block is in the
> > * pass-through state which is what we want.
> > *
> > -* For CCS_D, on the other hand, we don't care as we're about to 
> > perform a
> > -* fast-clear operation.  In that case, being hot in caches more useful.
> > +* For CCS_D, do the same thing. On gen9+, this avoids having any 
> > undefined
> > +* bits in the aux buffer.
> > */
> > -   const uint32_t alloc_flags = mt->aux_usage == ISL_AUX_USAGE_CCS_E ?
> > -BO_ALLOC_ZEROED : BO_ALLOC_BUSY;
> > -   mt->aux_buf = intel_alloc_aux_buffer(brw, "ccs-miptree",
> > -_ccs_surf, alloc_flags, mt);
> > +   mt->aux_buf = intel_alloc_aux_buffer(brw, "ccs-miptree", _ccs_surf,
> > +BO_ALLOC_ZEROED, mt);
> >if (!mt->aux_buf) {
> >   free(aux_state);
> >   return false;
> > --
> > 2.16.2
> > 
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
> 
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/9] anv: remove unused field anv_queue::pool

2018-05-03 Thread Jason Ekstrand
Reviewed-by: Jason Ekstrand 

On Wed, May 2, 2018 at 9:01 AM, Scott D Phillips  wrote:

> The last use of the field was removed in 2015's ("48a87f4ba06
> anv/queue: Get rid of the serial")
> ---
>  src/intel/vulkan/anv_device.c  | 1 -
>  src/intel/vulkan/anv_private.h | 2 --
>  2 files changed, 3 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 856035b8b91..c0cec175826 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -1268,7 +1268,6 @@ anv_queue_init(struct anv_device *device, struct
> anv_queue *queue)
>  {
> queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
> queue->device = device;
> -   queue->pool = >surface_state_pool;
> queue->flags = 0;
>  }
>
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
> private.h
> index d8b34b149e4..d043c77826e 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -838,8 +838,6 @@ struct anv_queue {
>
>  struct anv_device * device;
>
> -struct anv_state_pool * pool;
> -
>  VkDeviceQueueCreateFlagsflags;
>  };
>
> --
> 2.14.3
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/9] util/set: add a set_clear function

2018-05-03 Thread Jason Ekstrand
Reviewed-by: Jason Ekstrand 

On Wed, May 2, 2018 at 9:01 AM, Scott D Phillips  wrote:

> Clear a set back to the state of having zero entries.
> ---
>  src/util/set.c | 23 +++
>  src/util/set.h |  3 +++
>  2 files changed, 26 insertions(+)
>
> diff --git a/src/util/set.c b/src/util/set.c
> index d71f771807f..2c9b09319ff 100644
> --- a/src/util/set.c
> +++ b/src/util/set.c
> @@ -155,6 +155,29 @@ _mesa_set_destroy(struct set *ht, void
> (*delete_function)(struct set_entry *entr
> ralloc_free(ht);
>  }
>
> +/**
> + * Clears all values from the given set.
> + *
> + * If delete_function is passed, it gets called on each entry present
> before
> + * the set is cleared.
> + */
> +void
> +_mesa_set_clear(struct set *set, void (*delete_function)(struct set_entry
> *entry))
> +{
> +   struct set_entry *entry;
> +
> +   if (!set)
> +  return;
> +
> +   set_foreach (set, entry) {
> +  if (delete_function)
> + delete_function(entry);
> +  entry->key = deleted_key;
> +   }
> +
> +   set->entries = set->deleted_entries = 0;
> +}
> +
>  /**
>   * Finds a set entry with the given key and hash of that key.
>   *
> diff --git a/src/util/set.h b/src/util/set.h
> index 9acd2c28c9f..06e79e15867 100644
> --- a/src/util/set.h
> +++ b/src/util/set.h
> @@ -61,6 +61,9 @@ _mesa_set_create(void *mem_ctx,
>  void
>  _mesa_set_destroy(struct set *set,
>void (*delete_function)(struct set_entry *entry));
> +void
> +_mesa_set_clear(struct set *set,
> +void (*delete_function)(struct set_entry *entry));
>
>  struct set_entry *
>  _mesa_set_add(struct set *set, const void *key);
> --
> 2.14.3
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/17] i965/miptree: Zero-initialize CCS_D buffers

2018-05-03 Thread Jason Ekstrand

Good catch. Rb

On May 3, 2018 12:04:59 Nanley Chery  wrote:


Before this patch, the aux_state was actually AUX_INVALID because the BO
was never defined. This was fine on single slice miptrees because we
would fast-clear the resource right after creation. For multi-slice
miptrees on SKL+ however, this results in undefined behavior when
accessing a non-base slice. Here's a specific example:

1) Fast clear level 0
  * Undefined CCS_D buffer allocated in "PASS_THROUGH" state.
  * Level 0 transitions to the CLEAR state.
2) Render to level 1
  * Level 1 may have a 2-bit pattern of 2's.
  * Rendering with a 2 in the CCS is undefined.

Cc: 
---
src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 10 --
1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c

index 377efae32c9..182a896e23a 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1806,13 +1806,11 @@ intel_miptree_alloc_ccs(struct brw_context *brw,
* A CCS value of 0 indicates that the corresponding block is in the
* pass-through state which is what we want.
*
-* For CCS_D, on the other hand, we don't care as we're about to perform a
-* fast-clear operation.  In that case, being hot in caches more useful.
+* For CCS_D, do the same thing. On gen9+, this avoids having any undefined
+* bits in the aux buffer.
*/
-   const uint32_t alloc_flags = mt->aux_usage == ISL_AUX_USAGE_CCS_E ?
-BO_ALLOC_ZEROED : BO_ALLOC_BUSY;
-   mt->aux_buf = intel_alloc_aux_buffer(brw, "ccs-miptree",
-_ccs_surf, alloc_flags, mt);
+   mt->aux_buf = intel_alloc_aux_buffer(brw, "ccs-miptree", _ccs_surf,
+BO_ALLOC_ZEROED, mt);
   if (!mt->aux_buf) {
  free(aux_state);
  return false;
--
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Lowering viewport transformation in NIR

2018-05-03 Thread Rob Clark
On Thu, May 3, 2018 at 3:12 PM, Alyssa Rosenzweig  wrote:
> Hi all,
>
> Certain embedded GPUs do not implement coordinate transformation in
> hardware. Instead, section 12.5 "Coordinate Transformation" of the ES
> 3.2 specification is implemented in the vertex shader itself. Relevant
> examples include Midgard and vc4.
>
> To handle this, a lowering pass is needed to convert gl_Position writes
> to screen space writes. The vc4 driver lowers this in the backend IR;
> however, I don't think the pass needs to be specialised to the backend.
> For Midgard, I have written a NIR lowering pass to implement the same,
> which enables the lowered instructions themselves to be optimised.
>
> At the moment, this pass lives inside the (downstream) Midgard compiler.
> In the future, it will be necessary for the Bifrost compiler as well,
> should that use NIR. That said, Bifrost will share the same Gallium
> driver, so the pass could still live in the driver
> (src/gallium/drivers/panfrost).
>
> Should this pass be moved into common code (src/compiler/nir)? If so,
> what would the driver agnostic way of passing viewport parameters be?
> Both vc4 and Midgard currently use/will use special uniforms for this
> purpose. Similarly, is there a driver agnostic way of representing the
> transformed write? The Midgard pass emits `nir_instrinic_store_output`
> for the final value, but I'm not sure if this is generalisable.
>

maybe look at the user_clip_plane lowering as an example, ie.
nir_intrinsic_load_user_clip_plane

there are a few other examples of sysval's that lower to driver_params
(ie. things driver stuffs in special uniforms, basically)

BR,
-R


> I can send the Midgard code as an RFC if there's interest.
>
> Thanks,
>
> -Alyssa
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Lowering viewport transformation in NIR

2018-05-03 Thread Alyssa Rosenzweig
Hi all,

Certain embedded GPUs do not implement coordinate transformation in
hardware. Instead, section 12.5 "Coordinate Transformation" of the ES
3.2 specification is implemented in the vertex shader itself. Relevant
examples include Midgard and vc4.

To handle this, a lowering pass is needed to convert gl_Position writes
to screen space writes. The vc4 driver lowers this in the backend IR;
however, I don't think the pass needs to be specialised to the backend.
For Midgard, I have written a NIR lowering pass to implement the same,
which enables the lowered instructions themselves to be optimised.

At the moment, this pass lives inside the (downstream) Midgard compiler.
In the future, it will be necessary for the Bifrost compiler as well,
should that use NIR. That said, Bifrost will share the same Gallium
driver, so the pass could still live in the driver
(src/gallium/drivers/panfrost).

Should this pass be moved into common code (src/compiler/nir)? If so,
what would the driver agnostic way of passing viewport parameters be?
Both vc4 and Midgard currently use/will use special uniforms for this
purpose. Similarly, is there a driver agnostic way of representing the
transformed write? The Midgard pass emits `nir_instrinic_store_output`
for the final value, but I'm not sure if this is generalisable.

I can send the Midgard code as an RFC if there's interest.

Thanks,

-Alyssa
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/17] i965/blorp: Also skip the fast clear if the clear color differs

2018-05-03 Thread Nanley Chery
If the aux state is CLEAR and clear color value has changed, only the
surface state must be updated. The bit-pattern in the aux buffer is
exactly the same.
---
 src/mesa/drivers/dri/i965/brw_blorp.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index c805dae42e6..01fb48100cc 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -1230,13 +1230,12 @@ do_single_blorp_clear(struct brw_context *brw, struct 
gl_framebuffer *fb,
  brw_meta_convert_fast_clear_color(brw, irb->mt,
>Color.ClearColor);
 
-  bool same_clear_color =
- !intel_miptree_set_clear_color(brw, irb->mt, clear_color);
+  intel_miptree_set_clear_color(brw, irb->mt, clear_color);
 
-  /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the clear
+  /* If the buffer is already in ISL_AUX_STATE_CLEAR, the clear
* is redundant and can be skipped.
*/
-  if (aux_state == ISL_AUX_STATE_CLEAR && same_clear_color)
+  if (aux_state == ISL_AUX_STATE_CLEAR)
  return;
 
   DBG("%s (fast) to mt %p level %d layers %d+%d\n", __FUNCTION__,
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/17] i965/blorp: Disable BLORP clear color updates

2018-05-03 Thread Nanley Chery
With the previous patches, we now update the indirect clear color buffer
every time the clear color changes. Avoid redundant updates.
---
 src/mesa/drivers/dri/i965/brw_blorp.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 01fb48100cc..ba1a9e8e56e 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -1262,7 +1262,8 @@ do_single_blorp_clear(struct brw_context *brw, struct 
gl_framebuffer *fb,
   brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
 
   struct blorp_batch batch;
-  blorp_batch_init(>blorp, , brw, 0);
+  blorp_batch_init(>blorp, , brw,
+   BLORP_BATCH_NO_UPDATE_CLEAR_COLOR);
   blorp_fast_clear(, , isl_format,
level, irb->mt_layer, num_layers,
x0, y0, x1, y1);
@@ -1616,7 +1617,8 @@ intel_hiz_exec(struct brw_context *brw, struct 
intel_mipmap_tree *mt,
   , start_layer, num_layers, isl_tmp);
 
struct blorp_batch batch;
-   blorp_batch_init(>blorp, , brw, 0);
+   blorp_batch_init(>blorp, , brw,
+BLORP_BATCH_NO_UPDATE_CLEAR_COLOR);
blorp_hiz_op(, , level, start_layer, num_layers, op);
blorp_batch_finish();
 
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/17] intel/blorp: Add a NO_UPDATE_CLEAR_COLOR batch flag

2018-05-03 Thread Nanley Chery
Allow callers to handle updating the indirect clear color buffer
themselves. This can reduce the number of clear color updates in the
case where a caller performs multiple fast clears with the same clear
color.
---
 src/intel/blorp/blorp.h   | 5 +
 src/intel/blorp/blorp_genX_exec.h | 6 --
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
index 4626f2f83c2..f22110bc840 100644
--- a/src/intel/blorp/blorp.h
+++ b/src/intel/blorp/blorp.h
@@ -72,6 +72,11 @@ enum blorp_batch_flags {
 
/* This flag indicates that the blorp call should be predicated. */
BLORP_BATCH_PREDICATE_ENABLE  = (1 << 1),
+
+   /* This flag indicates that blorp should *not* update the indirect clear
+* color buffer.
+*/
+   BLORP_BATCH_NO_UPDATE_CLEAR_COLOR = (1 << 2),
 };
 
 struct blorp_batch {
diff --git a/src/intel/blorp/blorp_genX_exec.h 
b/src/intel/blorp/blorp_genX_exec.h
index 593521b95cc..446743b5910 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -1700,8 +1700,10 @@ blorp_update_clear_color(struct blorp_batch *batch,
 static void
 blorp_exec(struct blorp_batch *batch, const struct blorp_params *params)
 {
-   blorp_update_clear_color(batch, >dst, params->fast_clear_op);
-   blorp_update_clear_color(batch, >depth, params->hiz_op);
+   if (!(batch->flags & BLORP_BATCH_NO_UPDATE_CLEAR_COLOR)) {
+  blorp_update_clear_color(batch, >dst, params->fast_clear_op);
+  blorp_update_clear_color(batch, >depth, params->hiz_op);
+   }
 
 #if GEN_GEN >= 8
if (params->hiz_op != ISL_AUX_OP_NONE) {
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/17] i965: Use set_clear_color for depth miptrees

2018-05-03 Thread Nanley Chery
Reduce code duplication now and prevent it in the following commits.
---
 src/mesa/drivers/dri/i965/brw_clear.c |  3 ++-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 13 -
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  5 -
 3 files changed, 2 insertions(+), 19 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_clear.c 
b/src/mesa/drivers/dri/i965/brw_clear.c
index 3d540d6d905..2f61ea8ef15 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -213,7 +213,8 @@ brw_fast_clear_depth(struct gl_context *ctx)
  }
   }
 
-  intel_miptree_set_depth_clear_value(brw, mt, clear_value);
+  const union isl_color_value clear_color = { .f32 = {clear_value, } };
+  intel_miptree_set_clear_color(brw, mt, clear_color);
   same_clear_value = false;
}
 
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index da5b40df047..07ce2ac2adf 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -3745,19 +3745,6 @@ intel_miptree_set_clear_color(struct brw_context *brw,
return false;
 }
 
-bool
-intel_miptree_set_depth_clear_value(struct brw_context *brw,
-struct intel_mipmap_tree *mt,
-float clear_value)
-{
-   if (mt->fast_clear_color.f32[0] != clear_value) {
-  mt->fast_clear_color.f32[0] = clear_value;
-  brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE;
-  return true;
-   }
-   return false;
-}
-
 union isl_color_value
 intel_miptree_get_clear_color(const struct gen_device_info *devinfo,
   const struct intel_mipmap_tree *mt,
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index de4c3f4183a..9c25051979f 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -741,11 +741,6 @@ intel_miptree_get_clear_color(const struct gen_device_info 
*devinfo,
   struct brw_bo **clear_color_bo,
   uint32_t *clear_color_offset);
 
-bool
-intel_miptree_set_depth_clear_value(struct brw_context *brw,
-struct intel_mipmap_tree *mt,
-float clear_value);
-
 #ifdef __cplusplus
 }
 #endif
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/17] i965/clear: Drop a stale comment in fast_clear_depth

2018-05-03 Thread Nanley Chery
This comment made more sense when it was above the calls to
intel_miptree_slice_set_needs_depth_resolve(). We stopped using these
functions at commit 554f7d6d02931ea45653c8872565d21c1678a6da
("i965: Move depth to the new resolve functions").
---
 src/mesa/drivers/dri/i965/brw_clear.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_clear.c 
b/src/mesa/drivers/dri/i965/brw_clear.c
index a65839a0a05..24c8b242442 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -228,13 +228,9 @@ brw_fast_clear_depth(struct gl_context *ctx)
   }
}
 
-   /* Now, the HiZ buffer contains data that needs to be resolved to the depth
-* buffer.
-*/
intel_miptree_set_aux_state(brw, mt, depth_irb->mt_level,
depth_irb->mt_layer, num_layers,
ISL_AUX_STATE_CLEAR);
-
return true;
 }
 
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/17] i965: Prepare to delete intel_miptree_alloc_ccs()

2018-05-03 Thread Nanley Chery
We're going to delete intel_miptree_alloc_ccs() in the next commit. With
that in mind, replace the use of this function in
do_single_blorp_clear() with intel_miptree_alloc_aux() and move the
delayed allocation logic to it's callers.
---
 src/mesa/drivers/dri/i965/brw_blorp.c |  2 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 19 ++-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  2 +-
 3 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index ba14136edc6..e6eedf3cedc 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -1209,7 +1209,7 @@ do_single_blorp_clear(struct brw_context *brw, struct 
gl_framebuffer *fb,
 */
if (can_fast_clear && !irb->mt->aux_buf) {
   assert(irb->mt->aux_usage == ISL_AUX_USAGE_CCS_D);
-  if (!intel_miptree_alloc_ccs(brw, irb->mt)) {
+  if (!intel_miptree_alloc_aux(brw, irb->mt)) {
  /* There are a few reasons in addition to out-of-memory, that can
   * cause intel_miptree_alloc_non_msrt_mcs to fail.  Try to recover by
   * falling back to non-fast clear.
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 1de381141ea..84be7c07c6f 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -59,10 +59,6 @@ static void *intel_miptree_map_raw(struct brw_context *brw,
 
 static void intel_miptree_unmap_raw(struct intel_mipmap_tree *mt);
 
-static bool
-intel_miptree_alloc_aux(struct brw_context *brw,
-struct intel_mipmap_tree *mt);
-
 static bool
 intel_miptree_supports_mcs(struct brw_context *brw,
const struct intel_mipmap_tree *mt)
@@ -791,7 +787,8 @@ intel_miptree_create(struct brw_context *brw,
 
mt->offset = 0;
 
-   if (!intel_miptree_alloc_aux(brw, mt)) {
+   if (mt->aux_usage != ISL_AUX_USAGE_CCS_D &&
+   !intel_miptree_alloc_aux(brw, mt)) {
   intel_miptree_release();
   return NULL;
}
@@ -882,7 +879,8 @@ intel_miptree_create_for_bo(struct brw_context *brw,
if (!(flags & MIPTREE_CREATE_NO_AUX)) {
   intel_miptree_choose_aux_usage(brw, mt);
 
-  if (!intel_miptree_alloc_aux(brw, mt)) {
+  if (mt->aux_usage != ISL_AUX_USAGE_CCS_D &&
+  !intel_miptree_alloc_aux(brw, mt)) {
  intel_miptree_release();
  return NULL;
   }
@@ -1781,7 +1779,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
return true;
 }
 
-bool
+static bool
 intel_miptree_alloc_ccs(struct brw_context *brw,
 struct intel_mipmap_tree *mt)
 {
@@ -1902,7 +1900,7 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
  * create the auxiliary surfaces up-front.  CCS_D, on the other hand, can only
  * compress clear color so we wait until an actual fast-clear to allocate it.
  */
-static bool
+bool
 intel_miptree_alloc_aux(struct brw_context *brw,
 struct intel_mipmap_tree *mt)
 {
@@ -1924,11 +1922,6 @@ intel_miptree_alloc_aux(struct brw_context *brw,
   return true;
 
case ISL_AUX_USAGE_CCS_D:
-  /* Since CCS_D can only compress clear color so we wait until an actual
-   * fast-clear to allocate it.
-   */
-  return true;
-
case ISL_AUX_USAGE_CCS_E:
   assert(_mesa_is_format_color_format(mt->format));
   assert(mt->surf.samples == 1);
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 8cea562dfa4..9a5d97bf348 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -379,7 +379,7 @@ struct intel_mipmap_tree
 };
 
 bool
-intel_miptree_alloc_ccs(struct brw_context *brw,
+intel_miptree_alloc_aux(struct brw_context *brw,
 struct intel_mipmap_tree *mt);
 
 enum intel_miptree_create_flags {
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/17] i965/clear: Remove an early return in fast_clear_depth

2018-05-03 Thread Nanley Chery
Reduce complexity and allow the next patch to delete some code. With
this change, clear operations will still be skipped and setting the
aux_state will cause no side-effects.

Remove the associated comment which implies an early return.

Reviewed-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/brw_clear.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_clear.c 
b/src/mesa/drivers/dri/i965/brw_clear.c
index 2f61ea8ef15..ba79447fc87 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -231,10 +231,6 @@ brw_fast_clear_depth(struct gl_context *ctx)
}
 
if (!need_clear) {
-  /* If all of the layers we intend to clear are already in the clear
-   * state then simply updating the miptree fast clear value is sufficient
-   * to change their clear value.
-   */
   if (devinfo->gen >= 10 && !same_clear_value) {
  /* Before gen10, it was enough to just update the clear value in the
   * miptree. But on gen10+, we let blorp update the clear value state
@@ -255,7 +251,6 @@ brw_fast_clear_depth(struct gl_context *ctx)
  }
  brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
   }
-  return true;
}
 
for (unsigned a = 0; a < num_layers; a++) {
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/17] i965: Update the indirect buffer in set_clear_color

2018-05-03 Thread Nanley Chery
Although BLORP currently does the update when performing a fast clear,
it's simpler to do it ourselves. Remove the dependency on BLORP.
---
 src/mesa/drivers/dri/i965/brw_clear.c | 37 ---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 13 ++
 2 files changed, 13 insertions(+), 37 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_clear.c 
b/src/mesa/drivers/dri/i965/brw_clear.c
index ba79447fc87..a65839a0a05 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -108,7 +108,6 @@ brw_fast_clear_depth(struct gl_context *ctx)
struct intel_mipmap_tree *mt = depth_irb->mt;
struct gl_renderbuffer_attachment *depth_att = 
>Attachment[BUFFER_DEPTH];
const struct gen_device_info *devinfo = >screen->devinfo;
-   bool same_clear_value = true;
 
if (devinfo->gen < 6)
   return false;
@@ -215,42 +214,6 @@ brw_fast_clear_depth(struct gl_context *ctx)
 
   const union isl_color_value clear_color = { .f32 = {clear_value, } };
   intel_miptree_set_clear_color(brw, mt, clear_color);
-  same_clear_value = false;
-   }
-
-   bool need_clear = false;
-   for (unsigned a = 0; a < num_layers; a++) {
-  enum isl_aux_state aux_state =
- intel_miptree_get_aux_state(mt, depth_irb->mt_level,
- depth_irb->mt_layer + a);
-
-  if (aux_state != ISL_AUX_STATE_CLEAR) {
- need_clear = true;
- break;
-  }
-   }
-
-   if (!need_clear) {
-  if (devinfo->gen >= 10 && !same_clear_value) {
- /* Before gen10, it was enough to just update the clear value in the
-  * miptree. But on gen10+, we let blorp update the clear value state
-  * buffer when doing a fast clear. Since we are skipping the fast
-  * clear here, we need to update the clear color ourselves.
-  */
- uint32_t clear_offset = mt->aux_buf->clear_color_offset;
- union isl_color_value clear_color = { .f32 = { clear_value, } };
-
- /* We can't update the clear color while the hardware is still using
-  * the previous one for a resolve or sampling from it. So make sure
-  * that there's no pending commands at this point.
-  */
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
- for (int i = 0; i < 4; i++) {
-brw_store_data_imm32(brw, mt->aux_buf->clear_color_bo,
- clear_offset + i * 4, clear_color.u32[i]);
- }
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
-  }
}
 
for (unsigned a = 0; a < num_layers; a++) {
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 07ce2ac2adf..bd4ddbc2f58 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -3739,6 +3739,19 @@ intel_miptree_set_clear_color(struct brw_context *brw,
 {
if (memcmp(>fast_clear_color, _color, sizeof(clear_color)) != 0) {
   mt->fast_clear_color = clear_color;
+  if (mt->aux_buf->clear_color_bo) {
+ /* We can't update the clear color while the hardware is still using
+  * the previous one for a resolve or sampling from it. Make sure that
+  * there are no pending commands at this point.
+  */
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
+ for (int i = 0; i < 4; i++) {
+brw_store_data_imm32(brw, mt->aux_buf->clear_color_bo,
+ mt->aux_buf->clear_color_offset + i * 4,
+ mt->fast_clear_color.u32[i]);
+ }
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+  }
   brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE;
   return true;
}
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/17] Revert "i965: Make the miptree clear color setter take a gl_color_union"

2018-05-03 Thread Nanley Chery
This reverts commit 1d94aa19877fb702ffacacde28ad7253cce72c97.

The next patch will make depth miptrees use the clear color setter that
was originally being used for color miptrees. Go back to using the
isl_color_value parameter because it's the same type as the
fast_clear_color field used by color and depth miptrees.
---
 src/mesa/drivers/dri/i965/brw_blorp.c | 5 -
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 6 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 2 +-
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index e6eedf3cedc..c805dae42e6 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -1226,9 +1226,12 @@ do_single_blorp_clear(struct brw_context *brw, struct 
gl_framebuffer *fb,
if (can_fast_clear) {
   const enum isl_aux_state aux_state =
  intel_miptree_get_aux_state(irb->mt, irb->mt_level, irb->mt_layer);
+  union isl_color_value clear_color =
+ brw_meta_convert_fast_clear_color(brw, irb->mt,
+   >Color.ClearColor);
 
   bool same_clear_color =
- !intel_miptree_set_clear_color(brw, irb->mt, >Color.ClearColor);
+ !intel_miptree_set_clear_color(brw, irb->mt, clear_color);
 
   /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the clear
* is redundant and can be skipped.
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 08976d2680f..da5b40df047 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -36,7 +36,6 @@
 
 #include "brw_blorp.h"
 #include "brw_context.h"
-#include "brw_meta_util.h"
 #include "brw_state.h"
 
 #include "main/enums.h"
@@ -3736,11 +3735,8 @@ get_isl_dim_layout(const struct gen_device_info *devinfo,
 bool
 intel_miptree_set_clear_color(struct brw_context *brw,
   struct intel_mipmap_tree *mt,
-  const union gl_color_union *color)
+  union isl_color_value clear_color)
 {
-   const union isl_color_value clear_color =
-  brw_meta_convert_fast_clear_color(brw, mt, color);
-
if (memcmp(>fast_clear_color, _color, sizeof(clear_color)) != 0) {
   mt->fast_clear_color = clear_color;
   brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index d8d69698f54..de4c3f4183a 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -731,7 +731,7 @@ intel_miptree_sample_with_hiz(struct brw_context *brw,
 bool
 intel_miptree_set_clear_color(struct brw_context *brw,
   struct intel_mipmap_tree *mt,
-  const union gl_color_union *color);
+  union isl_color_value clear_color);
 
 /* Get a clear color suitable for filling out an ISL surface state. */
 union isl_color_value
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/17] i965/miptree: Unify aux buffer allocation

2018-05-03 Thread Nanley Chery
There isn't much that changes between the aux allocation functions.
Remove the duplicated code.
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 227 +++---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h |   9 -
 2 files changed, 95 insertions(+), 141 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 84be7c07c6f..08976d2680f 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1736,95 +1736,9 @@ intel_alloc_aux_buffer(struct brw_context *brw,
return buf;
 }
 
-static bool
-intel_miptree_alloc_mcs(struct brw_context *brw,
-struct intel_mipmap_tree *mt,
-GLuint num_samples)
-{
-   assert(brw->screen->devinfo.gen >= 7); /* MCS only used on Gen7+ */
-   assert(mt->aux_buf == NULL);
-   assert(mt->aux_usage == ISL_AUX_USAGE_MCS);
-
-   /* Multisampled miptrees are only supported for single level. */
-   assert(mt->first_level == 0);
-   enum isl_aux_state **aux_state =
-  create_aux_state_map(mt, ISL_AUX_STATE_CLEAR);
-   if (!aux_state)
-  return false;
-
-   struct isl_surf temp_mcs_surf;
-
-   MAYBE_UNUSED bool ok =
-  isl_surf_get_mcs_surf(>isl_dev, >surf, _mcs_surf);
-   assert(ok);
-
-   /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
-*
-* When MCS buffer is enabled and bound to MSRT, it is required that it
-* is cleared prior to any rendering.
-*
-* Since we don't use the MCS buffer for any purpose other than rendering,
-* it makes sense to just clear it immediately upon allocation.
-*
-* Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
-*/
-   mt->aux_buf = intel_alloc_aux_buffer(brw, _mcs_surf, true, 0xFF);
-   if (!mt->aux_buf) {
-  free(aux_state);
-  return false;
-   }
-
-   mt->aux_state = aux_state;
-
-   return true;
-}
-
-static bool
-intel_miptree_alloc_ccs(struct brw_context *brw,
-struct intel_mipmap_tree *mt)
-{
-   assert(mt->aux_buf == NULL);
-   assert(mt->aux_usage == ISL_AUX_USAGE_CCS_E ||
-  mt->aux_usage == ISL_AUX_USAGE_CCS_D);
-
-   struct isl_surf temp_ccs_surf;
-
-   if (!isl_surf_get_ccs_surf(>isl_dev, >surf, _ccs_surf, 0))
-  return false;
-
-   assert(temp_ccs_surf.size &&
-  (temp_ccs_surf.size % temp_ccs_surf.row_pitch == 0));
-
-   enum isl_aux_state **aux_state =
-  create_aux_state_map(mt, ISL_AUX_STATE_PASS_THROUGH);
-   if (!aux_state)
-  return false;
-
-   /* When CCS_E is used, we need to ensure that the CCS starts off in a valid
-* state.  From the Sky Lake PRM, "MCS Buffer for Render Target(s)":
-*
-*"If Software wants to enable Color Compression without Fast clear,
-*Software needs to initialize MCS with zeros."
-*
-* A CCS value of 0 indicates that the corresponding block is in the
-* pass-through state which is what we want.
-*
-* For CCS_D, do the same thing. On gen9+, this avoids having any undefined
-* bits in the aux buffer.
-*/
-   mt->aux_buf = intel_alloc_aux_buffer(brw, _ccs_surf, true, 0);
-   if (!mt->aux_buf) {
-  free(aux_state);
-  return false;
-   }
-
-   mt->aux_state = aux_state;
-
-   return true;
-}
 
 /**
- * Helper for intel_miptree_alloc_hiz() that sets
+ * Helper for intel_miptree_alloc_aux() that sets
  * \c mt->level[level].has_hiz. Return true if and only if
  * \c has_hiz was set.
  */
@@ -1859,37 +1773,78 @@ intel_miptree_level_enable_hiz(struct brw_context *brw,
return true;
 }
 
-bool
-intel_miptree_alloc_hiz(struct brw_context *brw,
-   struct intel_mipmap_tree *mt)
-{
-   assert(mt->aux_buf == NULL);
-   assert(mt->aux_usage == ISL_AUX_USAGE_HIZ);
 
-   enum isl_aux_state **aux_state =
-  create_aux_state_map(mt, ISL_AUX_STATE_AUX_INVALID);
-   if (!aux_state)
-  return false;
+/* Returns true iff all params are successfully filled. */
+static bool
+get_aux_buf_params(const struct brw_context *brw,
+   const struct intel_mipmap_tree *mt,
+   enum isl_aux_state *initial_state,
+   uint8_t *memset_value,
+   struct isl_surf *aux_surf)
+{
+   assert(initial_state && memset_value && aux_surf);
 
-   struct isl_surf temp_hiz_surf;
+   switch (mt->aux_usage) {
+   case ISL_AUX_USAGE_NONE:
+  aux_surf->size = 0;
+  return true;
+   case ISL_AUX_USAGE_HIZ:
+  assert(!_mesa_is_format_color_format(mt->format));
 
-   MAYBE_UNUSED bool ok =
-  isl_surf_get_hiz_surf(>isl_dev, >surf, _hiz_surf);
-   assert(ok);
+  *initial_state = ISL_AUX_STATE_AUX_INVALID;
+  {
+ MAYBE_UNUSED bool ok =
+isl_surf_get_hiz_surf(>isl_dev, >surf, aux_surf);
+ assert(ok);
+  }
+  return true;
+   case ISL_AUX_USAGE_MCS:
+  assert(_mesa_is_format_color_format(mt->format));
+  

[Mesa-dev] [PATCH 06/17] i965/miptree: Drop the alloc_flags param from alloc_aux_buffer

2018-05-03 Thread Nanley Chery
We have enough information to determine the optimal flags internally.
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 29 +--
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 566ead0d5c8..e065c2f62e0 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1661,7 +1661,6 @@ intel_miptree_copy_teximage(struct brw_context *brw,
 static struct intel_miptree_aux_buffer *
 intel_alloc_aux_buffer(struct brw_context *brw,
const struct isl_surf *aux_surf,
-   uint32_t alloc_flags,
bool wants_memset,
uint8_t memset_value,
struct intel_mipmap_tree *mt)
@@ -1685,6 +1684,17 @@ intel_alloc_aux_buffer(struct brw_context *brw,
buf->pitch = aux_surf->row_pitch;
buf->qpitch = isl_surf_get_array_pitch_sa_rows(aux_surf);
 
+   /* If the buffer needs to be initialised (requiring the buffer to be
+* immediately mapped to cpu space for writing), do not use the gpu access
+* flag which can cause an unnecessary delay if the backing pages happened
+* to be just used by the GPU.
+*/
+   const bool alloc_zeroed = wants_memset && memset_value == 0;
+   const bool needs_memset =
+  !alloc_zeroed && (wants_memset || has_indirect_clear);
+   const uint32_t alloc_flags =
+  alloc_zeroed ? BO_ALLOC_ZEROED : (needs_memset ? 0 : BO_ALLOC_BUSY);
+
/* ISL has stricter set of alignment rules then the drm allocator.
 * Therefore one can pass the ISL dimensions in terms of bytes instead of
 * trying to recalculate based on different format block sizes.
@@ -1697,7 +1707,6 @@ intel_alloc_aux_buffer(struct brw_context *brw,
}
 
/* Initialize the bo to the desired value */
-   const bool needs_memset = wants_memset || has_indirect_clear;
if (needs_memset) {
   assert(!(alloc_flags & BO_ALLOC_BUSY));
 
@@ -1752,12 +1761,6 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
   isl_surf_get_mcs_surf(>isl_dev, >surf, _mcs_surf);
assert(ok);
 
-   /* Buffer needs to be initialised requiring the buffer to be immediately
-* mapped to cpu space for writing. Therefore do not use the gpu access
-* flag which can cause an unnecessary delay if the backing pages happened
-* to be just used by the GPU.
-*/
-   const uint32_t alloc_flags = 0;
/* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
 *
 * When MCS buffer is enabled and bound to MSRT, it is required that it
@@ -1768,8 +1771,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
 *
 * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
 */
-   mt->aux_buf = intel_alloc_aux_buffer(brw, _mcs_surf,
-alloc_flags, true, 0xFF, mt);
+   mt->aux_buf = intel_alloc_aux_buffer(brw, _mcs_surf, true, 0xFF, mt);
if (!mt->aux_buf) {
   free(aux_state);
   return false;
@@ -1813,8 +1815,7 @@ intel_miptree_alloc_ccs(struct brw_context *brw,
 * For CCS_D, do the same thing. On gen9+, this avoids having any undefined
 * bits in the aux buffer.
 */
-   mt->aux_buf = intel_alloc_aux_buffer(brw, _ccs_surf,
-BO_ALLOC_ZEROED, false, 0, mt);
+   mt->aux_buf = intel_alloc_aux_buffer(brw, _ccs_surf, true, 0, mt);
if (!mt->aux_buf) {
   free(aux_state);
   return false;
@@ -1879,9 +1880,7 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
   isl_surf_get_hiz_surf(>isl_dev, >surf, _hiz_surf);
assert(ok);
 
-   const uint32_t alloc_flags = 0;
-   mt->aux_buf = intel_alloc_aux_buffer(brw, _hiz_surf,
-alloc_flags, false, 0, mt);
+   mt->aux_buf = intel_alloc_aux_buffer(brw, _hiz_surf, false, 0, mt);
 
if (!mt->aux_buf) {
   free(aux_state);
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/17] i965/miptree: Drop the mt param from alloc_aux_buffer

2018-05-03 Thread Nanley Chery
Drop an unused parameter.
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index e065c2f62e0..1de381141ea 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1662,8 +1662,7 @@ static struct intel_miptree_aux_buffer *
 intel_alloc_aux_buffer(struct brw_context *brw,
const struct isl_surf *aux_surf,
bool wants_memset,
-   uint8_t memset_value,
-   struct intel_mipmap_tree *mt)
+   uint8_t memset_value)
 {
struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
if (!buf)
@@ -1771,7 +1770,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
 *
 * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
 */
-   mt->aux_buf = intel_alloc_aux_buffer(brw, _mcs_surf, true, 0xFF, mt);
+   mt->aux_buf = intel_alloc_aux_buffer(brw, _mcs_surf, true, 0xFF);
if (!mt->aux_buf) {
   free(aux_state);
   return false;
@@ -1815,7 +1814,7 @@ intel_miptree_alloc_ccs(struct brw_context *brw,
 * For CCS_D, do the same thing. On gen9+, this avoids having any undefined
 * bits in the aux buffer.
 */
-   mt->aux_buf = intel_alloc_aux_buffer(brw, _ccs_surf, true, 0, mt);
+   mt->aux_buf = intel_alloc_aux_buffer(brw, _ccs_surf, true, 0);
if (!mt->aux_buf) {
   free(aux_state);
   return false;
@@ -1880,7 +1879,7 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
   isl_surf_get_hiz_surf(>isl_dev, >surf, _hiz_surf);
assert(ok);
 
-   mt->aux_buf = intel_alloc_aux_buffer(brw, _hiz_surf, false, 0, mt);
+   mt->aux_buf = intel_alloc_aux_buffer(brw, _hiz_surf, false, 0);
 
if (!mt->aux_buf) {
   free(aux_state);
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/17] i965/miptree: Zero-initialize CCS_D buffers

2018-05-03 Thread Nanley Chery
Before this patch, the aux_state was actually AUX_INVALID because the BO
was never defined. This was fine on single slice miptrees because we
would fast-clear the resource right after creation. For multi-slice
miptrees on SKL+ however, this results in undefined behavior when
accessing a non-base slice. Here's a specific example:

1) Fast clear level 0
   * Undefined CCS_D buffer allocated in "PASS_THROUGH" state.
   * Level 0 transitions to the CLEAR state.
2) Render to level 1
   * Level 1 may have a 2-bit pattern of 2's.
   * Rendering with a 2 in the CCS is undefined.

Cc: 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 377efae32c9..182a896e23a 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1806,13 +1806,11 @@ intel_miptree_alloc_ccs(struct brw_context *brw,
 * A CCS value of 0 indicates that the corresponding block is in the
 * pass-through state which is what we want.
 *
-* For CCS_D, on the other hand, we don't care as we're about to perform a
-* fast-clear operation.  In that case, being hot in caches more useful.
+* For CCS_D, do the same thing. On gen9+, this avoids having any undefined
+* bits in the aux buffer.
 */
-   const uint32_t alloc_flags = mt->aux_usage == ISL_AUX_USAGE_CCS_E ?
-BO_ALLOC_ZEROED : BO_ALLOC_BUSY;
-   mt->aux_buf = intel_alloc_aux_buffer(brw, "ccs-miptree",
-_ccs_surf, alloc_flags, mt);
+   mt->aux_buf = intel_alloc_aux_buffer(brw, "ccs-miptree", _ccs_surf,
+BO_ALLOC_ZEROED, mt);
if (!mt->aux_buf) {
   free(aux_state);
   return false;
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/17] i965/miptree: Drop the name param from alloc_aux_buffer

2018-05-03 Thread Nanley Chery
A name of "aux-miptree" should be sufficient.
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index e70c9ff1ef4..566ead0d5c8 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1660,7 +1660,6 @@ intel_miptree_copy_teximage(struct brw_context *brw,
 
 static struct intel_miptree_aux_buffer *
 intel_alloc_aux_buffer(struct brw_context *brw,
-   const char *name,
const struct isl_surf *aux_surf,
uint32_t alloc_flags,
bool wants_memset,
@@ -1690,7 +1689,7 @@ intel_alloc_aux_buffer(struct brw_context *brw,
 * Therefore one can pass the ISL dimensions in terms of bytes instead of
 * trying to recalculate based on different format block sizes.
 */
-   buf->bo = brw_bo_alloc_tiled(brw->bufmgr, name, buf->size,
+   buf->bo = brw_bo_alloc_tiled(brw->bufmgr, "aux-miptree", buf->size,
 I915_TILING_Y, buf->pitch, alloc_flags);
if (!buf->bo) {
   free(buf);
@@ -1769,7 +1768,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
 *
 * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
 */
-   mt->aux_buf = intel_alloc_aux_buffer(brw, "mcs-miptree", _mcs_surf,
+   mt->aux_buf = intel_alloc_aux_buffer(brw, _mcs_surf,
 alloc_flags, true, 0xFF, mt);
if (!mt->aux_buf) {
   free(aux_state);
@@ -1814,7 +1813,7 @@ intel_miptree_alloc_ccs(struct brw_context *brw,
 * For CCS_D, do the same thing. On gen9+, this avoids having any undefined
 * bits in the aux buffer.
 */
-   mt->aux_buf = intel_alloc_aux_buffer(brw, "ccs-miptree", _ccs_surf,
+   mt->aux_buf = intel_alloc_aux_buffer(brw, _ccs_surf,
 BO_ALLOC_ZEROED, false, 0, mt);
if (!mt->aux_buf) {
   free(aux_state);
@@ -1881,7 +1880,7 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
assert(ok);
 
const uint32_t alloc_flags = 0;
-   mt->aux_buf = intel_alloc_aux_buffer(brw, "hiz-miptree", _hiz_surf,
+   mt->aux_buf = intel_alloc_aux_buffer(brw, _hiz_surf,
 alloc_flags, false, 0, mt);
 
if (!mt->aux_buf) {
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/17] i965/miptree: Initialize the indirect clear color to zero

2018-05-03 Thread Nanley Chery
The indirect clear color isn't correctly tracked in
intel_miptree::fast_clear_color. The initial value of ::fast_clear_color
is zero, while that of the indirect clear color is undefined or
non-zero.

Topi Pohjolainen discovered this issue with MCS buffers. This issue is
apparent when fast-clearing an MCS buffer for the first time with
glClearColor = {0.0,}. Although the indirect clear color is non-zero,
the initial aux state of the MCS is CLEAR and the tracked clear color is
zero, so we avoid updating the indirect clear color with {0.0,}.

Make the indirect clear color match the initial value of
::fast_clear_color.

---

Hey Topi,

Just FYI, this patch should fix the MCS bug you reported earlier.

 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 33 ++-
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 5d3ee569bd8..e70c9ff1ef4 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -978,11 +978,11 @@ create_ccs_buf_for_image(struct brw_context *brw,
 * system with CCS, we don't have the extra space at the end of the aux
 * buffer. So create a new bo here that will store that clear color.
 */
-   const struct gen_device_info *devinfo = >screen->devinfo;
-   if (devinfo->gen >= 10) {
+   if (brw->isl_dev.ss.clear_color_state_size > 0) {
   mt->aux_buf->clear_color_bo =
- brw_bo_alloc(brw->bufmgr, "clear_color_bo",
-  brw->isl_dev.ss.clear_color_state_size);
+ brw_bo_alloc_tiled(brw->bufmgr, "clear_color_bo",
+brw->isl_dev.ss.clear_color_state_size,
+I915_TILING_NONE, 0, BO_ALLOC_ZEROED);
   if (!mt->aux_buf->clear_color_bo) {
  free(mt->aux_buf);
  mt->aux_buf = NULL;
@@ -1673,9 +1673,9 @@ intel_alloc_aux_buffer(struct brw_context *brw,
 
buf->size = aux_surf->size;
 
-   const struct gen_device_info *devinfo = >screen->devinfo;
-   if (devinfo->gen >= 10) {
-  /* On CNL, instead of setting the clear color in the SURFACE_STATE, we
+   const bool has_indirect_clear = brw->isl_dev.ss.clear_color_state_size > 0;
+   if (has_indirect_clear) {
+  /* On CNL+, instead of setting the clear color in the SURFACE_STATE, we
* will set a pointer to a dword somewhere that contains the color. So,
* allocate the space for the clear color value here on the aux buffer.
*/
@@ -1698,7 +1698,8 @@ intel_alloc_aux_buffer(struct brw_context *brw,
}
 
/* Initialize the bo to the desired value */
-   if (wants_memset) {
+   const bool needs_memset = wants_memset || has_indirect_clear;
+   if (needs_memset) {
   assert(!(alloc_flags & BO_ALLOC_BUSY));
 
   void *map = brw_bo_map(brw, buf->bo, MAP_WRITE | MAP_RAW);
@@ -1706,11 +1707,21 @@ intel_alloc_aux_buffer(struct brw_context *brw,
  intel_miptree_aux_buffer_free(buf);
  return NULL;
   }
-  memset(map, memset_value, mt->aux_buf->size);
+
+  /* Memset the aux_surf portion of the BO. */
+  if (wants_memset)
+ memset(map, memset_value, aux_surf->size);
+
+  /* Zero the indirect clear color to match ::fast_clear_color. */
+  if (has_indirect_clear) {
+ memset((char *)map + buf->clear_color_offset, 0,
+brw->isl_dev.ss.clear_color_state_size);
+  }
+
   brw_bo_unmap(buf->bo);
}
 
-   if (devinfo->gen >= 10) {
+   if (has_indirect_clear) {
   buf->clear_color_bo = buf->bo;
   brw_bo_reference(buf->clear_color_bo);
}
@@ -1869,7 +1880,7 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
   isl_surf_get_hiz_surf(>isl_dev, >surf, _hiz_surf);
assert(ok);
 
-   const uint32_t alloc_flags = BO_ALLOC_BUSY;
+   const uint32_t alloc_flags = 0;
mt->aux_buf = intel_alloc_aux_buffer(brw, "hiz-miptree", _hiz_surf,
 alloc_flags, false, 0, mt);
 
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/17] i965/miptree: Move init_mcs into alloc_aux_buffer

2018-05-03 Thread Nanley Chery
Add infrastructure for initializing the clear color BO.
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 68 ---
 1 file changed, 31 insertions(+), 37 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 182a896e23a..5d3ee569bd8 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1658,41 +1658,13 @@ intel_miptree_copy_teximage(struct brw_context *brw,
intel_obj->needs_validate = true;
 }
 
-static bool
-intel_miptree_init_mcs(struct brw_context *brw,
-   struct intel_mipmap_tree *mt,
-   int init_value)
-{
-   assert(mt->aux_buf != NULL);
-
-   /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
-*
-* When MCS buffer is enabled and bound to MSRT, it is required that it
-* is cleared prior to any rendering.
-*
-* Since we don't use the MCS buffer for any purpose other than rendering,
-* it makes sense to just clear it immediately upon allocation.
-*
-* Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
-*/
-   void *map = brw_bo_map(brw, mt->aux_buf->bo, MAP_WRITE | MAP_RAW);
-   if (unlikely(map == NULL)) {
-  fprintf(stderr, "Failed to map mcs buffer into GTT\n");
-  intel_miptree_aux_buffer_free(mt->aux_buf);
-  mt->aux_buf = NULL;
-  return false;
-   }
-   void *data = map;
-   memset(data, init_value, mt->aux_buf->size);
-   brw_bo_unmap(mt->aux_buf->bo);
-   return true;
-}
-
 static struct intel_miptree_aux_buffer *
 intel_alloc_aux_buffer(struct brw_context *brw,
const char *name,
const struct isl_surf *aux_surf,
uint32_t alloc_flags,
+   bool wants_memset,
+   uint8_t memset_value,
struct intel_mipmap_tree *mt)
 {
struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
@@ -1725,6 +1697,19 @@ intel_alloc_aux_buffer(struct brw_context *brw,
   return NULL;
}
 
+   /* Initialize the bo to the desired value */
+   if (wants_memset) {
+  assert(!(alloc_flags & BO_ALLOC_BUSY));
+
+  void *map = brw_bo_map(brw, buf->bo, MAP_WRITE | MAP_RAW);
+  if (map == NULL) {
+ intel_miptree_aux_buffer_free(buf);
+ return NULL;
+  }
+  memset(map, memset_value, mt->aux_buf->size);
+  brw_bo_unmap(buf->bo);
+   }
+
if (devinfo->gen >= 10) {
   buf->clear_color_bo = buf->bo;
   brw_bo_reference(buf->clear_color_bo);
@@ -1763,10 +1748,19 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
 * to be just used by the GPU.
 */
const uint32_t alloc_flags = 0;
-   mt->aux_buf = intel_alloc_aux_buffer(brw, "mcs-miptree",
-_mcs_surf, alloc_flags, mt);
-   if (!mt->aux_buf ||
-   !intel_miptree_init_mcs(brw, mt, 0xFF)) {
+   /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
+*
+* When MCS buffer is enabled and bound to MSRT, it is required that it
+* is cleared prior to any rendering.
+*
+* Since we don't use the MCS buffer for any purpose other than rendering,
+* it makes sense to just clear it immediately upon allocation.
+*
+* Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
+*/
+   mt->aux_buf = intel_alloc_aux_buffer(brw, "mcs-miptree", _mcs_surf,
+alloc_flags, true, 0xFF, mt);
+   if (!mt->aux_buf) {
   free(aux_state);
   return false;
}
@@ -1810,7 +1804,7 @@ intel_miptree_alloc_ccs(struct brw_context *brw,
 * bits in the aux buffer.
 */
mt->aux_buf = intel_alloc_aux_buffer(brw, "ccs-miptree", _ccs_surf,
-BO_ALLOC_ZEROED, mt);
+BO_ALLOC_ZEROED, false, 0, mt);
if (!mt->aux_buf) {
   free(aux_state);
   return false;
@@ -1876,8 +1870,8 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
assert(ok);
 
const uint32_t alloc_flags = BO_ALLOC_BUSY;
-   mt->aux_buf = intel_alloc_aux_buffer(brw, "hiz-miptree",
-_hiz_surf, alloc_flags, mt);
+   mt->aux_buf = intel_alloc_aux_buffer(brw, "hiz-miptree", _hiz_surf,
+alloc_flags, false, 0, mt);
 
if (!mt->aux_buf) {
   free(aux_state);
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/17] i965/miptree: Fix handling of uninitialized MCS buffers

2018-05-03 Thread Nanley Chery
Before this patch, if we failed to initialize an MCS buffer, we'd
end up in a state in which the miptree thinks it has an MCS buffer,
but doesn't. We also leaked the clear_color_bo if it existed.

With this patch, we now free the miptree aux buffer resources and let
intel_miptree_alloc_mcs() know that the MCS buffer no longer exists.

Cc: 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index b9a564552df..377efae32c9 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1658,7 +1658,7 @@ intel_miptree_copy_teximage(struct brw_context *brw,
intel_obj->needs_validate = true;
 }
 
-static void
+static bool
 intel_miptree_init_mcs(struct brw_context *brw,
struct intel_mipmap_tree *mt,
int init_value)
@@ -1678,13 +1678,14 @@ intel_miptree_init_mcs(struct brw_context *brw,
void *map = brw_bo_map(brw, mt->aux_buf->bo, MAP_WRITE | MAP_RAW);
if (unlikely(map == NULL)) {
   fprintf(stderr, "Failed to map mcs buffer into GTT\n");
-  brw_bo_unreference(mt->aux_buf->bo);
-  free(mt->aux_buf);
-  return;
+  intel_miptree_aux_buffer_free(mt->aux_buf);
+  mt->aux_buf = NULL;
+  return false;
}
void *data = map;
memset(data, init_value, mt->aux_buf->size);
brw_bo_unmap(mt->aux_buf->bo);
+   return true;
 }
 
 static struct intel_miptree_aux_buffer *
@@ -1764,15 +1765,14 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
const uint32_t alloc_flags = 0;
mt->aux_buf = intel_alloc_aux_buffer(brw, "mcs-miptree",
 _mcs_surf, alloc_flags, mt);
-   if (!mt->aux_buf) {
+   if (!mt->aux_buf ||
+   !intel_miptree_init_mcs(brw, mt, 0xFF)) {
   free(aux_state);
   return false;
}
 
mt->aux_state = aux_state;
 
-   intel_miptree_init_mcs(brw, mt, 0xFF);
-
return true;
 }
 
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 00/17] i965: Avoid a redundant color buffer fast-clear

2018-05-03 Thread Nanley Chery
My main motivation for this series is to avoid one type of redundant
fast-clears for color buffers (patch 15). In doing so, I take
opportunities to improve the maintainability and performance of the
affected code along the way.

* Fix bugs that this series depends on for correct behavior (1-2).
* Efficient updates to the indirect clear color requires correctly
  tracking the clear color on the CPU. For correctness, make the initial
  indirect clear color match the CPU-side one (3-4).
* We've been modifying code related to aux buffer allocation thus far.
  Take this opportunity to remove duplicated code (5-9).
* Move the code to update the indirect depth clear color to a unified
  location to enable the same optimization of skipping an already
  cleared buffer (regardless of the new value) for color buffers (10-13).
* We've been modifying depth clear code thus far. Take this opportunity
  to delete a stale comment we found here (14).
* Now that we correctly track the indirect clear color and updated it as
  needed, enable the optimization of skipping a fast-clear on an already
  cleared color buffer, even if the clear color differs from the current
  one (15).
* Now that we update the indirect clear color in the clear color setter,
  avoid the redundant updates done by BLORP (16-17).

-Nanley

Nanley Chery (17):
  i965/miptree: Fix handling of uninitialized MCS buffers
  i965/miptree: Zero-initialize CCS_D buffers
  i965/miptree: Move init_mcs into alloc_aux_buffer
  i965/miptree: Initialize the indirect clear color to zero
  i965/miptree: Drop the name param from alloc_aux_buffer
  i965/miptree: Drop the alloc_flags param from alloc_aux_buffer
  i965/miptree: Drop the mt param from alloc_aux_buffer
  i965: Prepare to delete intel_miptree_alloc_ccs()
  i965/miptree: Unify aux buffer allocation
  Revert "i965: Make the miptree clear color setter take a
gl_color_union"
  i965: Use set_clear_color for depth miptrees
  i965/clear: Remove an early return in fast_clear_depth
  i965: Update the indirect buffer in set_clear_color
  i965/clear: Drop a stale comment in fast_clear_depth
  i965/blorp: Also skip the fast clear if the clear color differs
  intel/blorp: Add a NO_UPDATE_CLEAR_COLOR batch flag
  i965/blorp: Disable BLORP clear color updates

 src/intel/blorp/blorp.h   |   5 +
 src/intel/blorp/blorp_genX_exec.h |   6 +-
 src/mesa/drivers/dri/i965/brw_blorp.c |  18 +-
 src/mesa/drivers/dri/i965/brw_clear.c |  49 +---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 362 +++---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  18 +-
 6 files changed, 181 insertions(+), 277 deletions(-)

-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 00/18] anv: add shaderInt16 support

2018-05-03 Thread Clayton Craft
Quoting Iago Toral Quiroga (2018-04-30 07:18:08)
> This version addresses the feedback received to v1, which includes moving the
> bit-size lowering pass from intel to core NIR (patch 8) and a separate patch
> to add Intel's specific configuration for int16 (patch 9), and then it also
> adds a few things that were missing in the first version, namely, a fix for
> 16-bit comparisons to emit 32-bit booleans (patch 10 -a patch to optimize the
> resulting code will come later-) and 16-bit pack/unpack which is needed for
> 16-bit bitcasts (patches 11-15).
> 
> Patches 6-15 need review, the rest (1-5 and 16-18), have already been reviewed
> and don't have changes.
> 
> A branch with the series is available for testing in the
> 'itoral/shaderInt16ForReview_v2' branch of the Igalia mesa repository at
> github:
> 
> https://github.com/Igalia/mesa/tree/itoral/shaderInt16ForReview_v2
> 
> Iago Toral Quiroga (16):
>   intel/compiler: fix isign for 16-bit integers
>   i965/compiler: handle conversion to smaller type in the lowering pass
> for that
>   intel/compiler: implement conversion between float/int 16-bit types
>   intel/compiler: implement conversions from 16-bit int/float to bool
>   intel/compiler: fix brw_imm_w for negative 16-bit integers
>   compiler/nir: add a lowering pass to convert the bit size of ALU
> operations
>   intel/compiler: lower some 16-bit integer operations to 32-bit
>   intel/compiler: fix 16-bit comparisons
>   nir: add opcodes for 16-bit packing and unpacking
>   nir/lower_64bit_packing: extend the pass to handle packing from / to
> 16-bit.
>   compiler/lower_64bit_packing: rename the pass to be more generic
>   compiler/spirv: implement 16-bit bitcasts
>   intel/compiler: implement 16-bit pack/unpack opcodes
>   compiler/spirv: add implementation to check for SpvCapabilityInt16
> support
>   anv/pipeline: support SpvCapabilityInt16 in gen8+
>   anv/device: expose shaderInt16 support in gen8+
> 
> Jose Maria Casanova Crespo (2):
>   intel/compiler: implement nir_instr_type_load_const for 16-bit
> constants
>   intel/compiler: fix brw_negate_immediate for 16-bit types
> 
>  src/amd/vulkan/radv_shader.c   |   2 +-
>  src/compiler/Makefile.sources  |   3 +-
>  src/compiler/nir/meson.build   |   3 +-
>  src/compiler/nir/nir.h |   8 +-
>  src/compiler/nir/nir_lower_bit_size.c  | 127 
> +
>  ...r_lower_64bit_packing.c => nir_lower_packing.c} |  70 ++--
>  src/compiler/nir/nir_opcodes.py|  19 +++
>  src/compiler/shader_info.h |   1 +
>  src/compiler/spirv/spirv_to_nir.c  |   4 +-
>  src/compiler/spirv/vtn_alu.c   |  31 +++--
>  src/intel/compiler/brw_fs_lower_conversions.cpp|   5 +-
>  src/intel/compiler/brw_fs_nir.cpp  | 100 +++-
>  src/intel/compiler/brw_nir.c   |  23 +++-
>  src/intel/compiler/brw_reg.h   |   2 +-
>  src/intel/compiler/brw_shader.cpp  |  11 +-
>  src/intel/vulkan/anv_device.c  |   2 +-
>  src/intel/vulkan/anv_pipeline.c|   1 +
>  src/mesa/state_tracker/st_glsl_to_nir.cpp  |   2 +-
>  18 files changed, 356 insertions(+), 58 deletions(-)
>  create mode 100644 src/compiler/nir/nir_lower_bit_size.c
>  rename src/compiler/nir/{nir_lower_64bit_packing.c => nir_lower_packing.c} 
> (56%)
> 
> -- 
> 2.14.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Since this patch series was merged, we are seeing a number of failures in CI on
BSW, GLK, and BXT platforms:

dEQP-VK.spirv_assembly.instruction.compute.sconvert.int16_to_int64
dEQP-VK.spirv_assembly.instruction.compute.uconvert.uint16_to_uint64
dEQP-VK.spirv_assembly.instruction.compute.sconvert.int16_to_uint64


fdo bug: #106389


Output from tests is:

dEQP-VK.spirv_assembly.instruction.compute.sconvert.int16_to_int64:
  INTEL-MESA: error: ../src/intel/vulkan/anv_queue.c:275:
  VK_ERROR_OUT_OF_HOST_MEMORY
  INTEL-MESA: error: ../src/intel/vulkan/genX_state.c:247:
  VK_ERROR_OUT_OF_HOST_MEMORY
  deqp-vk: ../src/intel/compiler/brw_fs_generator.cpp:2482: int
  fs_generator::generate_code(const cfg_t*, int): Assertion `validated' failed.

dEQP-VK.spirv_assembly.instruction.compute.uconvert.uint16_to_uint64:
  INTEL-MESA: warning: ../src/intel/vulkan/anv_device.c:1034: FINISHME: 
Implement
  pop-free point clipping
  INTEL-MESA: debug: anv_GetPhysicalDeviceProperties2: ignored VkStructureType
  1000145002
  INTEL-MESA: debug: anv_GetPhysicalDeviceProperties2: ignored VkStructureType
  1000145002
  INTEL-MESA: error: ../src/intel/vulkan/anv_device.c:2602:
  VK_ERROR_OUT_OF_HOST_MEMORY
  INTEL-MESA: error: ../src/intel/vulkan/anv_image.c:581:
  

Re: [Mesa-dev] [ANNOUNCE] mesa 18.1.0-rc2

2018-05-03 Thread Emil Velikov
On 3 May 2018 at 18:22, Dylan Baker  wrote:
> Quoting Emil Velikov (2018-05-03 03:40:47)
>> Hi Dylan,
>>
>> A few small nitpicks:
>>
>> On 27 April 2018 at 22:07, Dylan Baker  wrote:
>> > Hi List,
>> >
>> s/List/list/
>>
>> > Mesa 18.1.0-rc2 is now available. There are 20 nominated patches, and no 
>> > queued
>> > or rejected patches. All patches applied cleanly, so no conflicts at all. 
>> > Yay.
>> >
>> Please follow the existing template or suggest improvements if
>> something feels unusual.
>>
>> Here - keep queued/nominated/rejected summary numbers on separate lines.
>> It cuts parse time to 0.5s ;-)
>>
>> >
>> > git tag: mesa-18.1.0-rc2
>> >
>> This should be just above the tarballs
>>
>> >
>> > Nominated (20)
>> > ==
>> >
>>
>> > Jason Ekstrand (2):
>> >
>> >   • i965/fs: Return mlen Android.common.mk Android.mk appveyor.yml 
>> > autogen.sh
>> > bin build-support changes.html CleanSpec.mk common.py configure.ac docs
>> > doxygen include install-gallium-links.mk install-lib-links.mk m4
>> > Makefile.am mesa-18.1.0-devel meson.build meson_options.txt REVIEWERS 
>> > scons
>> > SConstruct scripts src subprojects VERSION 8 for size_read() for
>> > INTERPOLATE_AT_*
>> Seems like you're using some homegrown script for the patch list.
>
> This is the output of the bin/shortlog_mesa.sh run though w3m to convert it to
> text.
>
The scripts documentation says "... in the release notes _files_,
_with HTML formatting_."

Or in other words - shortlog_mesa.sh (and bugzilla_mesa.sh) are used
only for the html page.

>>
>> You want to use git shortlog instead - it provides a consistent
>> experience and doesn't have the bugs hit above.
>> Don't forget to send patches for the releasing documentation. Be that
>> removing unnecessary fluff or adding missing bits of info.
>
> I have branch, but I wanted to wait until we got to the release, just so I 
> had a
> chance to polish and add things as I came on them.
>
Looking forward to the patches.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [ANNOUNCE] mesa 18.1.0-rc2

2018-05-03 Thread Dylan Baker
Quoting Emil Velikov (2018-05-03 03:40:47)
> Hi Dylan,
> 
> A few small nitpicks:
> 
> On 27 April 2018 at 22:07, Dylan Baker  wrote:
> > Hi List,
> >
> s/List/list/
> 
> > Mesa 18.1.0-rc2 is now available. There are 20 nominated patches, and no 
> > queued
> > or rejected patches. All patches applied cleanly, so no conflicts at all. 
> > Yay.
> >
> Please follow the existing template or suggest improvements if
> something feels unusual.
> 
> Here - keep queued/nominated/rejected summary numbers on separate lines.
> It cuts parse time to 0.5s ;-)
> 
> >
> > git tag: mesa-18.1.0-rc2
> >
> This should be just above the tarballs
> 
> >
> > Nominated (20)
> > ==
> >
> 
> > Jason Ekstrand (2):
> >
> >   • i965/fs: Return mlen Android.common.mk Android.mk appveyor.yml 
> > autogen.sh
> > bin build-support changes.html CleanSpec.mk common.py configure.ac docs
> > doxygen include install-gallium-links.mk install-lib-links.mk m4
> > Makefile.am mesa-18.1.0-devel meson.build meson_options.txt REVIEWERS 
> > scons
> > SConstruct scripts src subprojects VERSION 8 for size_read() for
> > INTERPOLATE_AT_*
> Seems like you're using some homegrown script for the patch list.

This is the output of the bin/shortlog_mesa.sh run though w3m to convert it to
text.

> 
> You want to use git shortlog instead - it provides a consistent
> experience and doesn't have the bugs hit above.
> Don't forget to send patches for the releasing documentation. Be that
> removing unnecessary fluff or adding missing bits of info.

I have branch, but I wanted to wait until we got to the release, just so I had a
chance to polish and add things as I came on them.

> -Emil



signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH] mesa/st/cb_clear: in st_Clear also validate the render state (needed by virgl)

2018-05-03 Thread Gert Wollny
Am Donnerstag, den 03.05.2018, 13:24 -0400 schrieb Ilia Mirkin:
> 
> The api call is "clear", not "glClear in the context of whatever
> random GL state there might be". When the gallium clear API is
> invoked, the bound framebuffer needs to be cleared. This is how the
> API works, this is how all drivers implement it. It's basically
> memset(). It doesn't care about rasterizer discard or anything else.
> 

I stand corrected and sorry for the noise.

Best,
Gert

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] egl: check if colorspace/surface type is supported

2018-05-03 Thread Emil Velikov
On 2 May 2018 at 17:23, Juan A. Suarez Romero  wrote:
> According to EGL 1.4 spec, section 3.5.1 ("Creating On-Screen Rendering
> Surfaces"), if config does not support the colorspace or alpha format
> attributes specified in attrib_list (as defined for
> eglCreateWindowSurface), an EGL_BAD_MATCH error is generated.
>
> This fixes dEQP-EGL.functional.wide_color.*_888_colorspace_srgb (still
> not merged,
> https://android-review.googlesource.com/c/platform/external/deqp/+/667322),
> which is crashing when trying to create a windows surface with RGB888
> configuration and sRGB colorspace.
>
> v2: Handle the fix in other backends (Tapani)
> ---
>  src/egl/drivers/dri2/platform_drm.c  | 5 +
>  src/egl/drivers/dri2/platform_wayland.c  | 6 ++
>  src/egl/drivers/dri2/platform_x11.c  | 5 +
>  src/egl/drivers/dri2/platform_x11_dri3.c | 5 +
>  4 files changed, 21 insertions(+)
>
> diff --git a/src/egl/drivers/dri2/platform_drm.c 
> b/src/egl/drivers/dri2/platform_drm.c
> index dc4efea9103..35bc4b5b1ac 100644
> --- a/src/egl/drivers/dri2/platform_drm.c
> +++ b/src/egl/drivers/dri2/platform_drm.c
> @@ -155,6 +155,11 @@ dri2_drm_create_window_surface(_EGLDriver *drv, 
> _EGLDisplay *disp,
> config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
>  dri2_surf->base.GLColorspace);
>
> +   if (!config) {
> +  _eglError(EGL_BAD_MATCH, "Unsupported surfacetype/colorspace 
> configuration");

Seems like android and surfaceless are missing the EGL_BAD_MATCH bit.
AFAICT they need it right?

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] amd/common: use llvm.amdgcn.wqm for explicit derivatives

2018-05-03 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Thu, May 3, 2018 at 5:18 PM, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>
> To comply with an upcoming change in LLVM, see
> https://reviews.llvm.org/D46051
> ---
>  src/amd/common/ac_llvm_build.c | 7 +++
>  1 file changed, 7 insertions(+)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index f21a5d2623c..c9b2e36b632 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -1241,20 +1241,27 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
> trbl = ac_build_intrinsic(ctx,
> "llvm.amdgcn.ds.swizzle", ctx->i32,
> args, 2,
> AC_FUNC_ATTR_READNONE |
> AC_FUNC_ATTR_CONVERGENT);
> }
>
> tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
> trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
> result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
> +
> +   if (HAVE_LLVM >= 0x0700) {
> +   result = ac_build_intrinsic(ctx,
> +   "llvm.amdgcn.wqm.f32", ctx->f32,
> +   , 1, 0);
> +   }
> +
> return result;
>  }
>
>  void
>  ac_build_sendmsg(struct ac_llvm_context *ctx,
>  uint32_t msg,
>  LLVMValueRef wave_id)
>  {
> LLVMValueRef args[2];
> args[0] = LLVMConstInt(ctx->i32, msg, false);
> --
> 2.14.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH] mesa/st/cb_clear: in st_Clear also validate the render state (needed by virgl)

2018-05-03 Thread Ilia Mirkin
On Thu, May 3, 2018 at 1:15 PM, Gert Wollny  wrote:
> Am Donnerstag, den 03.05.2018, 12:51 -0400 schrieb Ilia Mirkin:
>>
>> virgl_clear implements a gallium API call which is meant to clear the
>> surface. It sounds like virglrenderer does not properly implement
>> that
>> call. Either workaround your buggy "hardware" (i.e. virglrenderer) by
>> throwing in an explicit "no, really, do this" call, or fix
>> virglrenderer to do the clear irrespective of what the last-set
>> rasterizer discard state might be. I'd recommend the latter (i.e.
>> fixing this in virglrenderer).
>>
>
> IMNSHO doing a clear regardless of the last-set rasterizer discard is
> just replacing one bug with another. Besides, there is another CTS test
> that apparently gets fixed by this patch
>
> dEQP-GLES3.functional.shaders.indexing.
>moredynamic.with_value_from_indexing_expression_fragment
>
> and this doesn't use rasterizer discard.
>
> In any case, virglrendere doesn't know about the state change because
> the client (virgl) doesn't send the state change before the clear. This
> is the bug.
>
> My problem is to fix this in the right place I need access to the
> current render state in virgl_clear so that I can send the updated
> state like it is done for other hardware when they call the blitter
> (and that's why I put an RFC into the subject, in the hope that someone
> who is more knowledgabe with the data structures used by virgl could
> give me a hint).
>
> Another option would be to always send state changes to the host,
> regardless of whether they will be used or not, but this seems to me
> like a waste of cycles.

The api call is "clear", not "glClear in the context of whatever
random GL state there might be". When the gallium clear API is
invoked, the bound framebuffer needs to be cleared. This is how the
API works, this is how all drivers implement it. It's basically
memset(). It doesn't care about rasterizer discard or anything else.

As long as you're not implementing that API, you're going to have problems.

Sounds like the other test is also fixed with extra flushing, which
means that you have some other piece of state not being properly
tracked.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH] mesa/st/cb_clear: in st_Clear also validate the render state (needed by virgl)

2018-05-03 Thread Gert Wollny
Am Donnerstag, den 03.05.2018, 12:51 -0400 schrieb Ilia Mirkin:
> 
> virgl_clear implements a gallium API call which is meant to clear the
> surface. It sounds like virglrenderer does not properly implement
> that
> call. Either workaround your buggy "hardware" (i.e. virglrenderer) by
> throwing in an explicit "no, really, do this" call, or fix
> virglrenderer to do the clear irrespective of what the last-set
> rasterizer discard state might be. I'd recommend the latter (i.e.
> fixing this in virglrenderer).
> 

IMNSHO doing a clear regardless of the last-set rasterizer discard is
just replacing one bug with another. Besides, there is another CTS test
that apparently gets fixed by this patch 

dEQP-GLES3.functional.shaders.indexing.
   moredynamic.with_value_from_indexing_expression_fragment

and this doesn't use rasterizer discard. 

In any case, virglrendere doesn't know about the state change because
the client (virgl) doesn't send the state change before the clear. This
is the bug.

My problem is to fix this in the right place I need access to the
current render state in virgl_clear so that I can send the updated
state like it is done for other hardware when they call the blitter
(and that's why I put an RFC into the subject, in the hope that someone
who is more knowledgabe with the data structures used by virgl could
give me a hint). 

Another option would be to always send state changes to the host,
regardless of whether they will be used or not, but this seems to me
like a waste of cycles.

Best,
Gert

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50/ir: fix printing of pixld

2018-05-03 Thread Ilia Mirkin
Reviewed-by: Ilia Mirkin 

[Can't apply now, but will tonight if no one beats me to it.]

On Thu, May 3, 2018 at 1:02 PM, Rhys Perry  wrote:
> Signed-off-by: Rhys Perry 
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
> index ab39f9fdf6..cbb21f5f72 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
> @@ -217,7 +217,7 @@ static const char *shflOpStr[] =
>
>  static const char *pixldOpStr[] =
>  {
> -   "count", "covmask", "offset", "cent_offset", "sampleid"
> +   "count", "covmask", "covered", "offset", "cent_offset", "sampleid"
>  };
>
>  static const char *rcprsqOpStr[] =
> --
> 2.14.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50/ir: fix printing of pixld

2018-05-03 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index ab39f9fdf6..cbb21f5f72 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -217,7 +217,7 @@ static const char *shflOpStr[] =
 
 static const char *pixldOpStr[] =
 {
-   "count", "covmask", "offset", "cent_offset", "sampleid"
+   "count", "covmask", "covered", "offset", "cent_offset", "sampleid"
 };
 
 static const char *rcprsqOpStr[] =
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [AppVeyor] mesa 18.0 #7614 failed

2018-05-03 Thread AppVeyor



Build mesa 7614 failed


Commit 5d3caa1ca4 by Boyuan Zhang on 4/25/2018 3:49 PM:

radeon/vcn: fix mpeg4 msg buffer settings\n\nPrevious bit-fields assignments are incorrect and will result certain mpeg4\ndecode failed due to wrong flag values. This patch fixes these assignments.\n\nSigned-off-by: Boyuan Zhang \nReviewed-by: Leo Liu \n(cherry picked from commit deba56accf4e1f8fc025f34f6cbc069285f76838)


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH] mesa/st/cb_clear: in st_Clear also validate the render state (needed by virgl)

2018-05-03 Thread Ilia Mirkin
On Thu, May 3, 2018 at 12:29 PM, Gert Wollny  wrote:
> Am Donnerstag, den 03.05.2018, 11:43 -0400 schrieb Ilia Mirkin:
>> You're supposed to keep track of the bound state (usually in the
>> context). After your clear() implementation is done, you have to undo
>> all the state you've messed up on your "hardware" (or mark it dirty
>> for revalidation).
>
> Thanks for you answer, but I think I didn't explain the issue properly.
>
>
> The problem is not the state after the clear(), it is what is happening
> before.  To explain the specific case GL_RASTERIZER_DISCARD.
>
> In the tests I mentioned you see following for the standalone test
>
> - setup contest and buffers
> - glClear(...)
> - glEnable(GL_RASTERIZER_DISCARD)
> - ...
> - glDrawArrays(...)
> - glDisable(GL_RASTERIZER_DISCARD)
> - check result
> - Tear down buffers and context
>
> On a normal host (r600 in my case) I see state updates before clear
> (initial setup) and a state update initialized by glDrawArrays.
> The state update related to the glDisable is not really transmitted to
> the hardware since it is not needed.
>
> On virgl the first state update isn't send to the host, but since the
> test is run stand-alone, and GL_RASTERIZER_DISCARD is disabled by
> default all is fine, i.e. clear() works as expected and the glDisable
> is of no consequence.
>
> Now, if you run the test suite batched, you get
>
> - setup contest and buffers
> - FOREACH rasterizer_discard_test
> -   glClear(...)
> -   glEnable(GL_RASTERIZER_DISCARD)
> -   ...
> -   glDrawArrays(...)
> -   glDisable(GL_RASTERIZER_DISCARD)
> -   check result
> - ENDFOR
> - Tear down buffers and context
>
> Here, at each beginning of the loop the "normal" host sends a state
> update to the hardware before clear is actually executed, because the
> blitter validates the render state. Hence
> glDisable(GL_RASTERIZER_DISCARD) goes into effect in consecutive runs
> and the clear() operation actually clears the buffers.
>
> On virgl the current code path for virgl_clear doesn't validate the
> render state, i.e. it doesn't do a blit like drivers that directly talk
> to the hardware and that validates the render state before the actual
> drawing happens. Virgl it just sends the command indicating clear from
> the guest running in Qemu to the host, without validating the state
> first. Hence the host doesn't update the hardware state, because it
> doesn't know that glDisable(GL_RASTERIZER_DISCARD) was called in the
> guest, and the blit, that will eventually be executed on the host is
> still executed as if GL_RASTERIZER_DISCARD were enabled.

virgl_clear implements a gallium API call which is meant to clear the
surface. It sounds like virglrenderer does not properly implement that
call. Either workaround your buggy "hardware" (i.e. virglrenderer) by
throwing in an explicit "no, really, do this" call, or fix
virglrenderer to do the clear irrespective of what the last-set
rasterizer discard state might be. I'd recommend the latter (i.e.
fixing this in virglrenderer).

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [ANNOUNCE] Mesa 18.0.3 release candidate

2018-05-03 Thread Juan A. Suarez Romero
Hello list,

The candidate for the Mesa 18.0.3 is now available. Currently we have:
 - 9 queued
 - 6 nominated (outstanding)
 - and 0 rejected patches

The is a fairly short queue consisting of patches to fix leaks in RADV and 
Winsys,
fix deadlock in internal queue, fix issues with ANV allocator, fix blit setup 
for
YUV LoadImage, and some other patches.

Take a look at section "Mesa stable queue" for more information.


Testing reports/general approval

Any testing reports (or general approval of the state of the branch) will be
greatly appreciated.

The plan is to have 18.0.3 this Monday (07th May), around or shortly after 10:00
GMT.

If you have any questions or suggestions - be that about the current patch queue
or otherwise, please go ahead.


Trivial merge conflicts
---
commit 1a23971b49f77960afb2c686d20fced476185602
Author: Leo Liu 

st/omx/enc: fix blit setup for YUV LoadImage

(cherry picked from commit 1c5f4f4e17f74d823d9e38c678e40e9f49e2c053)


Cheers,

J.A.


Mesa stable queue
-

Nominated (6)
==

Bas Nieuwenhuizen(2):
   9267ff9883 radv: Allow vkEnumerateInstanceVersion ProcAddr without 
instance.
   467c562a29 radv: Don't check the incoming apiVersion on CreateInstance.

Deepak Rawat (1):
   9a21c96126 egl/x11: Send invalidate to driver on copy_region path in 
swap_buffer

Jose Maria Casanova Crespo (2):
   2a76f03c90 intel/compiler: fix 16-bit int brw_negate_immediate and 
brw_abs_immediate
   f0e6dacee5 intel/compiler: fix brw_imm_w for negative 16-bit integers

Matthew Nicholls(1):
   97d57ef917 radv: fix multisample image copies


Queued (9)
===

Andres Rodriguez (1):
  radv/winsys: fix leaking resources from bo's imported by fd

Boyuan Zhang (1):
  radeon/vcn: fix mpeg4 msg buffer settings

Eric Anholt (1):
  gallium/util: Fix incorrect refcounting of separate stencil.

Jason Ekstrand (1):
  anv/allocator: Don't shrink either end of the block pool

Leo Liu (1):
  st/omx/enc: fix blit setup for YUV LoadImage

Marek Olšák (2):
  util/u_queue: fix a deadlock in util_queue_finish
  radeonsi/gfx9: workaround for INTERP with indirect indexing

Nanley Chery (1):
  i965/tex_image: Avoid the ASTC LDR workaround on gen9lp

Samuel Pitoiset (1):
  radv: compute the number of subpass attachments correctly


Rejected (0)
=


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH] mesa/st/cb_clear: in st_Clear also validate the render state (needed by virgl)

2018-05-03 Thread Gert Wollny
Am Donnerstag, den 03.05.2018, 11:43 -0400 schrieb Ilia Mirkin:
> You're supposed to keep track of the bound state (usually in the
> context). After your clear() implementation is done, you have to undo
> all the state you've messed up on your "hardware" (or mark it dirty
> for revalidation).

Thanks for you answer, but I think I didn't explain the issue properly.
 

The problem is not the state after the clear(), it is what is happening
before.  To explain the specific case GL_RASTERIZER_DISCARD.

In the tests I mentioned you see following for the standalone test 

- setup contest and buffers 
- glClear(...)
- glEnable(GL_RASTERIZER_DISCARD)
- ... 
- glDrawArrays(...) 
- glDisable(GL_RASTERIZER_DISCARD) 
- check result 
- Tear down buffers and context 

On a normal host (r600 in my case) I see state updates before clear
(initial setup) and a state update initialized by glDrawArrays. 
The state update related to the glDisable is not really transmitted to
the hardware since it is not needed. 

On virgl the first state update isn't send to the host, but since the
test is run stand-alone, and GL_RASTERIZER_DISCARD is disabled by
default all is fine, i.e. clear() works as expected and the glDisable
is of no consequence. 

Now, if you run the test suite batched, you get 

- setup contest and buffers 
- FOREACH rasterizer_discard_test
-   glClear(...)
-   glEnable(GL_RASTERIZER_DISCARD)
-   ... 
-   glDrawArrays(...) 
-   glDisable(GL_RASTERIZER_DISCARD) 
-   check result
- ENDFOR
- Tear down buffers and context 

Here, at each beginning of the loop the "normal" host sends a state
update to the hardware before clear is actually executed, because the
blitter validates the render state. Hence
glDisable(GL_RASTERIZER_DISCARD) goes into effect in consecutive runs
and the clear() operation actually clears the buffers.

On virgl the current code path for virgl_clear doesn't validate the
render state, i.e. it doesn't do a blit like drivers that directly talk
to the hardware and that validates the render state before the actual
drawing happens. Virgl it just sends the command indicating clear from
the guest running in Qemu to the host, without validating the state
first. Hence the host doesn't update the hardware state, because it 
doesn't know that glDisable(GL_RASTERIZER_DISCARD) was called in the
guest, and the blit, that will eventually be executed on the host is
still executed as if GL_RASTERIZER_DISCARD were enabled. 

Now, calling st_validate_state(st, ST_PIPELINE_RENDER) before
virgl_clear is called solves the problem, but I see that this might not
be thebest solution, because then all drivers that actually use the
blitter will validate the render state a second time without need. 

Best, 
Gert 



> 
> See for example
> https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouve
> au/nvc0/nvc0_surface.c#n1122
> which is how a bunch of state is restored after a blit which uses the
> 3d engine.
> 
> Cheers,
> 
>   -ilia
> 
> 
> On Thu, May 3, 2018 at 11:24 AM, Gert Wollny  om> wrote:
> > A number of  CTS tests from the dEQP-
> > GLES3.functional.rasterizer_discard.* subset
> > fail when the tests are run in a batch, because the
> > GL_RASTERIZER_DISCARD state
> > is enabled after running glClear and the disabled state issed after
> > a number of draw
> > commands is not properly send to the host.
> > 
> > This happens because in virgl the render state is not validated
> > (and updated on the host)
> > when clear is run. With this patch  the render state is explicitely
> > updated in st_Clear
> > thereby fixing these failures.
> > 
> > Signed-off-by: Gert Wollny 
> > 
> > ---
> > The reason why I send this as a RFC is that I'm well aware of the
> > fact that the drivers
> > that use the blitter to execute a clear do already implicitely call
> > st_validate_state for
> > ST_PIPELINE_RENDER because the blitter does so.  Unfortunetely, I
> > haven't found yet how I
> > can issue this call from virgl_clear, because I don't see how I can
> > access the rasterer
> > state from there, but maybe someone has a pointer for me?
> > 
> > Many thanks,
> > Gert
> > 
> > PS: I don't have mesa-git commit rights.
> > 
> > ---
> >  src/mesa/state_tracker/st_cb_clear.c | 1 +
> >  1 file changed, 1 insertion(+)
> > 
> > diff --git a/src/mesa/state_tracker/st_cb_clear.c
> > b/src/mesa/state_tracker/st_cb_clear.c
> > index fbc577a370..8f8295ea6c 100644
> > --- a/src/mesa/state_tracker/st_cb_clear.c
> > +++ b/src/mesa/state_tracker/st_cb_clear.c
> > @@ -377,6 +377,7 @@ st_Clear(struct gl_context *ctx, GLbitfield
> > mask)
> > 
> > /* This makes sure the pipe has the latest scissor, etc values
> > */
> > st_validate_state(st, ST_PIPELINE_CLEAR);
> > +   st_validate_state(st, ST_PIPELINE_RENDER);
> > 
> > 
> > if (mask & BUFFER_BITS_COLOR) {
> >for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++)
> > {
> > --
> > 2.16.1
> > 
> > 

[Mesa-dev] [PATCH 1/2] egl: declare dri2_glFlush using a macro

2018-05-03 Thread Tapani Pälli
This is done to avoid having same code for multiple
entrypoints, next patch wants to utilize glFinish.

Signed-off-by: Tapani Pälli 
---
 src/egl/drivers/dri2/egl_dri2.c | 44 +
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 45d0c7275c..de56c16221 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -104,25 +104,27 @@ dri_set_background_context(void *loaderPrivate)
_eglBindContextToThread(ctx, t);
 }
 
-static void
-dri2_gl_flush()
-{
-   static void (*glFlush)(void);
-   static mtx_t glFlushMutex = _MTX_INITIALIZER_NP;
-
-   mtx_lock();
-   if (!glFlush)
-  glFlush = _glapi_get_proc_address("glFlush");
-   mtx_unlock();
-
-   /* if glFlush is not available things are horribly broken */
-   if (!glFlush) {
-  _eglLog(_EGL_WARNING, "DRI2: failed to find glFlush entry point");
-  return;
-   }
-
-   glFlush();
-}
+typedef void (*funcp) (void);
+
+#define DECL_GLCALL(x) \
+static void dri2_##x() \
+{\
+   static funcp func##x = NULL;\
+   static mtx_t mutex = _MTX_INITIALIZER_NP;\
+   mtx_lock();\
+   if (!func##x)\
+  func##x = _glapi_get_proc_address(#x);\
+   mtx_unlock();\
+   if (!func##x) {\
+  _eglLog(_EGL_WARNING, "DRI2: failed to find " #x "entry point");\
+  return;\
+   }\
+   func##x();\
+}\
+
+DECL_GLCALL(glFlush);
+
+#undef DECL_GLCALL
 
 static GLboolean
 dri_is_thread_safe(void *loaderPrivate)
@@ -1511,7 +1513,7 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, 
_EGLSurface *dsurf,
 
/* flush before context switch */
if (old_ctx)
-  dri2_gl_flush();
+  dri2_glFlush();
 
ddraw = (dsurf) ? dri2_dpy->vtbl->get_dri_drawable(dsurf) : NULL;
rdraw = (rsurf) ? dri2_dpy->vtbl->get_dri_drawable(rsurf) : NULL;
@@ -3081,7 +3083,7 @@ dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, 
_EGLSync *sync,
   if (dri2_ctx && dri2_sync->base.SyncStatus == EGL_UNSIGNALED_KHR &&
   (flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)) {
  /* flush context if EGL_SYNC_FLUSH_COMMANDS_BIT_KHR is set */
- dri2_gl_flush();
+ dri2_glFlush();
   }
 
   /* if timeout is EGL_FOREVER_KHR, it should wait without any timeout.*/
-- 
2.13.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] egl: make eglWaitClient behave like glFinish

2018-05-03 Thread Tapani Pälli
As defined by the spec:
   "All rendering calls for the currently bound context, for
   the current rendering API, made prior to eglWaitClient, are
   guaranteed to be executed before native rendering calls made
   after eglWaitClient which affect the read or draw surfaces
   associated with that context.

   The same result can be achieved using client API-specific calls
   such as glFinish or vgFinish."

v2: call glFinish() to ensure identical behaviour

Signed-off-by: Tapani Pälli 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106337
---
 src/egl/drivers/dri2/egl_dri2.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index de56c16221..5cf366a04c 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -123,6 +123,7 @@ static void dri2_##x() \
 }\
 
 DECL_GLCALL(glFlush);
+DECL_GLCALL(glFinish);
 
 #undef DECL_GLCALL
 
@@ -1723,17 +1724,14 @@ dri2_query_buffer_age(_EGLDriver *drv, _EGLDisplay 
*dpy, _EGLSurface *surf)
 static EGLBoolean
 dri2_wait_client(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx)
 {
-   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
-   _EGLSurface *surf = ctx->DrawSurface;
-   __DRIdrawable *dri_drawable = dri2_dpy->vtbl->get_dri_drawable(surf);
-
(void) drv;
+   (void) disp;
+   (void) ctx;
 
/* FIXME: If EGL allows frontbuffer rendering for window surfaces,
 * we need to copy fake to real here.*/
 
-   if (dri2_dpy->flush != NULL)
-  dri2_dpy->flush->flush(dri_drawable);
+   dri2_glFinish();
 
return EGL_TRUE;
 }
-- 
2.13.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH] mesa/st/cb_clear: in st_Clear also validate the render state (needed by virgl)

2018-05-03 Thread Ilia Mirkin
You're supposed to keep track of the bound state (usually in the
context). After your clear() implementation is done, you have to undo
all the state you've messed up on your "hardware" (or mark it dirty
for revalidation).

See for example
https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c#n1122
which is how a bunch of state is restored after a blit which uses the
3d engine.

Cheers,

  -ilia


On Thu, May 3, 2018 at 11:24 AM, Gert Wollny  wrote:
> A number of  CTS tests from the dEQP-GLES3.functional.rasterizer_discard.* 
> subset
> fail when the tests are run in a batch, because the GL_RASTERIZER_DISCARD 
> state
> is enabled after running glClear and the disabled state issed after a number 
> of draw
> commands is not properly send to the host.
>
> This happens because in virgl the render state is not validated (and updated 
> on the host)
> when clear is run. With this patch  the render state is explicitely updated 
> in st_Clear
> thereby fixing these failures.
>
> Signed-off-by: Gert Wollny 
>
> ---
> The reason why I send this as a RFC is that I'm well aware of the fact that 
> the drivers
> that use the blitter to execute a clear do already implicitely call 
> st_validate_state for
> ST_PIPELINE_RENDER because the blitter does so.  Unfortunetely, I haven't 
> found yet how I
> can issue this call from virgl_clear, because I don't see how I can access 
> the rasterer
> state from there, but maybe someone has a pointer for me?
>
> Many thanks,
> Gert
>
> PS: I don't have mesa-git commit rights.
>
> ---
>  src/mesa/state_tracker/st_cb_clear.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/mesa/state_tracker/st_cb_clear.c 
> b/src/mesa/state_tracker/st_cb_clear.c
> index fbc577a370..8f8295ea6c 100644
> --- a/src/mesa/state_tracker/st_cb_clear.c
> +++ b/src/mesa/state_tracker/st_cb_clear.c
> @@ -377,6 +377,7 @@ st_Clear(struct gl_context *ctx, GLbitfield mask)
>
> /* This makes sure the pipe has the latest scissor, etc values */
> st_validate_state(st, ST_PIPELINE_CLEAR);
> +   st_validate_state(st, ST_PIPELINE_RENDER);
>
> if (mask & BUFFER_BITS_COLOR) {
>for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
> --
> 2.16.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH] mesa/st/cb_clear: in st_Clear also validate the render state (needed by virgl)

2018-05-03 Thread Gert Wollny
A number of  CTS tests from the dEQP-GLES3.functional.rasterizer_discard.* 
subset
fail when the tests are run in a batch, because the GL_RASTERIZER_DISCARD state
is enabled after running glClear and the disabled state issed after a number of 
draw
commands is not properly send to the host.

This happens because in virgl the render state is not validated (and updated on 
the host)
when clear is run. With this patch  the render state is explicitely updated in 
st_Clear
thereby fixing these failures.

Signed-off-by: Gert Wollny 

---
The reason why I send this as a RFC is that I'm well aware of the fact that the 
drivers
that use the blitter to execute a clear do already implicitely call 
st_validate_state for
ST_PIPELINE_RENDER because the blitter does so.  Unfortunetely, I haven't found 
yet how I
can issue this call from virgl_clear, because I don't see how I can access the 
rasterer
state from there, but maybe someone has a pointer for me?

Many thanks,
Gert

PS: I don't have mesa-git commit rights.

---
 src/mesa/state_tracker/st_cb_clear.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/state_tracker/st_cb_clear.c 
b/src/mesa/state_tracker/st_cb_clear.c
index fbc577a370..8f8295ea6c 100644
--- a/src/mesa/state_tracker/st_cb_clear.c
+++ b/src/mesa/state_tracker/st_cb_clear.c
@@ -377,6 +377,7 @@ st_Clear(struct gl_context *ctx, GLbitfield mask)
 
/* This makes sure the pipe has the latest scissor, etc values */
st_validate_state(st, ST_PIPELINE_CLEAR);
+   st_validate_state(st, ST_PIPELINE_RENDER);
 
if (mask & BUFFER_BITS_COLOR) {
   for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] amd/common: use llvm.amdgcn.wqm for explicit derivatives

2018-05-03 Thread Nicolai Hähnle
From: Nicolai Hähnle 

To comply with an upcoming change in LLVM, see
https://reviews.llvm.org/D46051
---
 src/amd/common/ac_llvm_build.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index f21a5d2623c..c9b2e36b632 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1241,20 +1241,27 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
trbl = ac_build_intrinsic(ctx,
"llvm.amdgcn.ds.swizzle", ctx->i32,
args, 2,
AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_CONVERGENT);
}
 
tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
+
+   if (HAVE_LLVM >= 0x0700) {
+   result = ac_build_intrinsic(ctx,
+   "llvm.amdgcn.wqm.f32", ctx->f32,
+   , 1, 0);
+   }
+
return result;
 }
 
 void
 ac_build_sendmsg(struct ac_llvm_context *ctx,
 uint32_t msg,
 LLVMValueRef wave_id)
 {
LLVMValueRef args[2];
args[0] = LLVMConstInt(ctx->i32, msg, false);
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106337] eglWaitClient() doesn't work as documented using DRI2 backend

2018-05-03 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106337

--- Comment #11 from Mike Gorchak  ---
> Yes that is a separate issue, please file another bug about that one.

Done! Bug 106377.

Thank you!

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106377] eglWaitClient() doesn't work for surfaceless contexts

2018-05-03 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106377

Bug ID: 106377
   Summary: eglWaitClient() doesn't work for surfaceless contexts
   Product: Mesa
   Version: 18.0
  Hardware: All
OS: All
Status: NEW
  Severity: major
  Priority: medium
 Component: EGL
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: mgorc...@qnx.com
QA Contact: mesa-dev@lists.freedesktop.org

When surfaceless contexts are in use (for example, for FBO rendering),
eglWaitClient return an error EGL_BAD_CURRENT_SURFACE. According to spec it
should do glFinish()-like behavior disregarding if it has bound surface to the
context or not.

In case of using DRI2 backend it happens that in case of surfaceless context
dri2_wait_client() is not called at all and aborted at _eglWaitClientCommon()
function with following check:

   /* let bad current context imply bad current surface */
   if (_eglGetContextHandle(ctx) == EGL_NO_CONTEXT ||
   _eglGetSurfaceHandle(ctx->DrawSurface) == EGL_NO_SURFACE)
  RETURN_EGL_ERROR(disp, EGL_BAD_CURRENT_SURFACE, EGL_FALSE);

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106337] eglWaitClient() doesn't work as documented using DRI2 backend

2018-05-03 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106337

--- Comment #10 from Tapani Pälli  ---
(In reply to Mike Gorchak from comment #9)
> Just checked generic path - all works fine, here is very minor fix to the
> provided fix:
> 
> +  _eglLog(_EGL_WARNING, "DRI2: failed to find glFlush entry point");
> 
> should be changed glFlush to glFinish :)

Thanks, I'll send this one to the list.

> Am I understand right that this fix is not supposed to fix FBOs +
> surfaceless context issue? It happens that in case of surfaceless context
> dri2_wait_client() is not called at all and aborted at
> _eglWaitClientCommon() function with following check:
> 
>/* let bad current context imply bad current surface */
>if (_eglGetContextHandle(ctx) == EGL_NO_CONTEXT ||
>_eglGetSurfaceHandle(ctx->DrawSurface) == EGL_NO_SURFACE)
>   RETURN_EGL_ERROR(disp, EGL_BAD_CURRENT_SURFACE, EGL_FALSE);

Yes that is a separate issue, please file another bug about that one.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106337] eglWaitClient() doesn't work as documented using DRI2 backend

2018-05-03 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106337

--- Comment #9 from Mike Gorchak  ---
Just checked generic path - all works fine, here is very minor fix to the
provided fix:

+  _eglLog(_EGL_WARNING, "DRI2: failed to find glFlush entry point");

should be changed glFlush to glFinish :)

Am I understand right that this fix is not supposed to fix FBOs + surfaceless
context issue? It happens that in case of surfaceless context
dri2_wait_client() is not called at all and aborted at _eglWaitClientCommon()
function with following check:

   /* let bad current context imply bad current surface */
   if (_eglGetContextHandle(ctx) == EGL_NO_CONTEXT ||
   _eglGetSurfaceHandle(ctx->DrawSurface) == EGL_NO_SURFACE)
  RETURN_EGL_ERROR(disp, EGL_BAD_CURRENT_SURFACE, EGL_FALSE);

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 21/22] i965: Setup glsl uniforms by index rather than name matching

2018-05-03 Thread Neil Roberts
Thanks for the feedback.

I’ve implemented your suggestion as the top two patches on a branch
here:

https://github.com/Igalia/mesa/tree/nroberts/count-uniform-storage

It also fixes a slight bug that it wasn’t passing down the
uniform_storage_locs argument into the recursion.

However I think my preference would be for keeping this as a separate
function because it seems a little confusing to make the function count two
different things depending on a bool argument. Perhaps for the sake of
maintainability it would be good to move it to glsl_types though.

If there is a strong preference to merge it into one function then of
course I don’t mind.

Regards,
- Neil

Timothy Arceri  writes:

> On 18/04/18 00:36, Alejandro Piñeiro wrote:
>> From: Neil Roberts 
>> 
>> Previously when setting up a uniform it would try to walk the uniform
>> storage slots and find one that matches the name of the given
>> variable. However, each variable already has a location which is an
>> index into the UniformStorage array so we can just directly jump to
>> the right slot. Some of the variables take up more than one slot so we
>> still need to calculate how many it uses.
>> 
>> The main reason to do this is to support ARB_gl_spirv because in that
>> case the uniforms don’t have names so the previous approach won’t
>> work.
>
> This is a nice improvement away :)
>
> However it might be nicer to just update the existing 
> glsl_type::uniform_locations() helper
> rather than create the custom count_uniform_storage_slots() helper.
>
> e.g
>
> unsigned
> glsl_type::uniform_locations(bool uniform_storage_locs) const
> {
> unsigned size = 0;
>
> switch (this->base_type) {
> case GLSL_TYPE_UINT:
> case GLSL_TYPE_INT:
> case GLSL_TYPE_FLOAT:
> case GLSL_TYPE_FLOAT16:
> case GLSL_TYPE_DOUBLE:
> case GLSL_TYPE_UINT16:
> case GLSL_TYPE_UINT8:
> case GLSL_TYPE_INT16:
> case GLSL_TYPE_INT8:
> case GLSL_TYPE_UINT64:
> case GLSL_TYPE_INT64:
> case GLSL_TYPE_BOOL:
> case GLSL_TYPE_SAMPLER:
> case GLSL_TYPE_IMAGE:
> case GLSL_TYPE_SUBROUTINE:
>return 1;
>
> case GLSL_TYPE_STRUCT:
> case GLSL_TYPE_INTERFACE:
>for (unsigned i = 0; i < this->length; i++)
>   size += this->fields.structure[i].type->uniform_locations();
>return size;
> case GLSL_TYPE_ARRAY:
>if (!uniform_storage_locs ||
>this->fields.array->is_array() ||
>this->fields.array->is_interface() ||
>this->fields.array->is_record()) {
>/* For uniform locations passed to the user via the API we
> * count all array elements.
> */
>   return this->length * this->fields.array->uniform_locations();
>} else {
>   /* For uniform storage the innermost array only uses a
>* single slot.
>*/
>   return 1;
>}
> default:
>return 0;
> }
> }
>
> If you agree this patch is:
>
> Reviewed-by: Timothy Arceri 
>
>> ---
>>   src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 50 
>> +++---
>>   1 file changed, 37 insertions(+), 13 deletions(-)
>> 
>> diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp 
>> b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
>> index 62b2951432a..cb5e07f74ba 100644
>> --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
>> @@ -118,35 +118,59 @@ brw_setup_image_uniform_values(gl_shader_stage stage,
>>  }
>>   }
>>   
>> +static unsigned
>> +count_uniform_storage_slots(const struct glsl_type *type)
>> +{
>> +   /* gl_uniform_storage can cope with one level of array, so if the
>> +* type is a composite type or an array where each element occupies
>> +* more than one slot than we need to recursively process it.
>> +*/
>> +   if (glsl_type_is_struct(type)) {
>> +  unsigned location_count = 0;
>> +
>> +  for (unsigned i = 0; i < glsl_get_length(type); i++) {
>> + const struct glsl_type *field_type = glsl_get_struct_field(type, 
>> i);
>> +
>> + location_count += count_uniform_storage_slots(field_type);
>> +  }
>> +
>> +  return location_count;
>> +   }
>> +
>> +   if (glsl_type_is_array(type)) {
>> +  const struct glsl_type *element_type = glsl_get_array_element(type);
>> +
>> +  if (glsl_type_is_array(element_type) ||
>> +  glsl_type_is_struct(element_type)) {
>> + unsigned element_count = count_uniform_storage_slots(element_type);
>> + return element_count * glsl_get_length(type);
>> +  }
>> +   }
>> +
>> +   return 1;
>> +}
>> +
>>   static void
>>   brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
>>  const struct gl_program *prog,
>>  struct brw_stage_prog_data *stage_prog_data,
>>  

[Mesa-dev] [Bug 104836] Missing library link breaks mesa on Debian/ia64

2018-05-03 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104836

--- Comment #4 from Jason Duerstock  ---
Any updates on this?  Is there a reason my proposed patch can't be accepted?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/4] improve buffer cache and reuse

2018-05-03 Thread Eero Tamminen

Hi,

On 02.05.2018 21:19, James Xiong wrote:

On Wed, 2 May 2018 14:18:21 +0300
Eero Tamminen  wrote:

[...]

You're missing information on:
* On which plaform you did the testing (affects variance)
* how many test rounds you ran, and
* what is your variance

>

I ran these tests on a gen9 platform/ubuntu 17.10 LTS.


If it's TDP limited in 3D tests (like all NUC and e.g. Broxton devices
seem to be in long running tests), it has clearly higher variance than
non-TDP (or temperature) limited desktop platforms.



Most of the tests
are consistent, especially the memory usage. The only exception is
GfxBench 4.0 gl_manhattan, I had to ran it 3 times and pick the highest
one. I will apply this method to all tests and re-send with updated
results.


(comments below are about FPS results, not memory usage.)

Performance of many GPU bound tests doesn't have normal Gaussian
distribution, but two (tight) peaks.  On our BXT machines these peaks
are currently e.g. in GfxBench Manhattan tests *3%* apart from each
other.

While you can get results from both performance peaks, whether your 
results fall onto either of these performance peaks, is more likely

to change between boots (I think due to alignment changes in kernel
memory allocations), than successive runs.
-> Your results may have less chance of being misleading, if you
   don't reboot when switching between Mesa version with your patch
   and one without.

Especially if you're running tests only on one machine (i.e. don't have
extra data from other machines against which you can correlate results),
I think you need more than 3 runs, both with and  without your patch.

While max() can provide better comparison for this kind of bimodal
result distribution than avg(), you should still calculate and provide
variance for your data with your patches.


- Eero
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [ANNOUNCE] mesa 18.1.0-rc2

2018-05-03 Thread Emil Velikov
Hi Dylan,

A few small nitpicks:

On 27 April 2018 at 22:07, Dylan Baker  wrote:
> Hi List,
>
s/List/list/

> Mesa 18.1.0-rc2 is now available. There are 20 nominated patches, and no 
> queued
> or rejected patches. All patches applied cleanly, so no conflicts at all. Yay.
>
Please follow the existing template or suggest improvements if
something feels unusual.

Here - keep queued/nominated/rejected summary numbers on separate lines.
It cuts parse time to 0.5s ;-)

>
> git tag: mesa-18.1.0-rc2
>
This should be just above the tarballs

>
> Nominated (20)
> ==
>

> Jason Ekstrand (2):
>
>   • i965/fs: Return mlen Android.common.mk Android.mk appveyor.yml autogen.sh
> bin build-support changes.html CleanSpec.mk common.py configure.ac docs
> doxygen include install-gallium-links.mk install-lib-links.mk m4
> Makefile.am mesa-18.1.0-devel meson.build meson_options.txt REVIEWERS 
> scons
> SConstruct scripts src subprojects VERSION 8 for size_read() for
> INTERPOLATE_AT_*
Seems like you're using some homegrown script for the patch list.

You want to use git shortlog instead - it provides a consistent
experience and doesn't have the bugs hit above.
Don't forget to send patches for the releasing documentation. Be that
removing unnecessary fluff or adding missing bits of info.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] i965: Fix ETC2/EAC GetCompressed* functions on Gen7 GPUs

2018-05-03 Thread Eero Tamminen

Hi,

On 02.05.2018 20:19, Matt Turner wrote:

On Wed, May 2, 2018 at 9:13 AM, Eleni Maria Stea  wrote:

Gen 7 GPUs store the compressed EAC/ETC2 images in other non-compressed
formats that can render. When GetCompressed* functions are called, the
pixels are returned in the non-compressed format that is used for the
rendering.

With this patch we store both the compressed and non-compressed versions
of the image, so that both rendering commands and GetCompressed*
commands work.

Also, the assertions for GL_MAP_WRITE_BIT and GL_MAP_INVALIDATE_RANGE_BIT
in intel_miptree_map_etc function have been removed because when the
miptree is mapped for reading (for example from a GetCompress*
function) the GL_MAP_WRITE_BIT won't be set (and shouldn't be set).

Fixes: the following test in CTS for gen7:
KHR-GL45.direct_state_access.textures_compressed_subimage test

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104272


I think you can add

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=81843

as well :)


This is really lovely feature.

Compressed texture formats are used to:
1. Reduce disk / network usage for the application install
2. Increase run-time performance (by reducing required bandwidth)
3. Reduce program memory usage

At the cost of worse texture quality.

Mesa transparently converting ETC to uncompressed data on platforms that
don't support ETC in HW, means that application doesn't get 2), just
worse texture quality, although some applications would have capability
to fall back to another (HW supported) texture compression format.

And this new patch means that instead of 3), memory usage actually
_increases_ compared to application using non-compressed textures
directly.


Some (many?) applications might fail to run if ETC isn't supported, so
I  understand why this feature is done, but it would be nice to have
some better way to handle it.

Maybe some new extension that can be used by future game engines &
application toolkits to query which of the compressed texture formats
are faked, so that they can instead select a compression format that
actually provides run-time benefits?


- Eero
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/3] mesa: actually support GLSL version overrides in compat profile

2018-05-03 Thread Emil Velikov
On 3 May 2018 at 00:10, Timothy Arceri  wrote:
>
>
> On 03/05/18 02:51, Emil Velikov wrote:
>>
>> On 2 May 2018 at 11:27, Timothy Arceri  wrote:
>>>
>>> ---
>>>   src/mesa/main/version.c | 2 ++
>>>   1 file changed, 2 insertions(+)
>>>
>>> diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c
>>> index 84babd69e2f..540f5482034 100644
>>> --- a/src/mesa/main/version.c
>>> +++ b/src/mesa/main/version.c
>>> @@ -591,6 +591,8 @@ _mesa_get_version(const struct gl_extensions
>>> *extensions,
>>>if (consts->GLSLVersion > 140) {
>>>   consts->GLSLVersion = 140;
>>>}
>>> + /* Support GLSL version overrides in compat profile */
>>> + _mesa_override_glsl_version(consts);
>>
>>
>> Why are we allowing this only for compat? As-is this feels very dirty
>> and skimming through the existing code doesn't help much.
>
>
> Because the code above this hard-codes compat to glsl 140. We have no
> consts->GLSLVersionCompat (and probably shouldn't) so drivers will otherwise
> set what ever they support in core to compat. This is how the code currently
> works and this patch allows the override to work as expected.
>
>>
>> * classic drivers - 965, starting at create_context
>> _mesa_initialize_context -> _mesa_init_constants -> GLSLVersion (120) +
>> override
>> intelInitExtensions -> GLSLVersion + override combo
>> _mesa_compute_version -> _mesa_get_version -> [optional] cap up-to 140
>> -> override
>> _mesa_compute_version -> tweak/match GLSL version based on the GL version
>>
>> Not to mention the initial 120 (effectively) in
>> _mesa_initialize_context is bonkers for the following:
>>   - Intel Gen2 (GL 1.3) and Gen3 (GL 1.4 or 2.1)
>>   - nouveau vieux - GL 1.2 or 1.3
>>   - radeon (r100/r200) - GL 1.3
>>
>> * gallium - two paths - create_screen and create_context, latter more
>> or less identical to i965
>> For the create_screen part:
>> st_api_query_versions (for max_gl*_version) -> _mesa_init_constants -> see
>> above
>> st_api_query_versions (for max_gl*_version) -> st_init_extensions ->
>> GLSLVersion + override combo
>> st_api_query_versions (for max_gl*_version) -> _mesa_get_version -> see
>> above
>>
>> As you can see things are hairy.
>>
>> A few ideas that come to mind:
>>   - drop the _mesa_init_constants bits and update any drivers needed
>>   - each of GLSLVersion, override and tweaks should happen [ideally]
>> once per ctx.
>
>
> I have no interest in tackling this right now. I disagree that this change
> requires such a big overhaul. Right now this fixes a bug, we can tidy up
> later. We are likely going to need to tweak how versioning works once 3.2
> compat profiles become supported.
>
The gallium suggestion is the big overhaul, the couple above are dead trivial.
Most of the work is tracking the lovely codepaths.

That said, I'm fine with this if a) some of the 'why' goes into the
commit message and b) things are fixed before/as we get GL 3.2 compat.
Just let's agree that env. overrides and drirc workarounds are mere hacks.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106187] Vulkan apps run on secondary GPU on multi-GPU system

2018-05-03 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106187

--- Comment #10 from Christian König  ---
Just some technical feedback.

(In reply to Bas Nieuwenhuizen from comment #9)
> 1) Running on a GPU that is not connected to the display is slower. Arguably
> we should be able to optimize this a bit, especially if the two cards are
> identical, but there is always some extra bus traffic that is going to
> happen (and not sure if card B can directly scanout from memory in card A).

At least with dedicated AMD GPUs that isn't possible, but for APUs we recently
enabled scanout from system memory.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 7.5/18] intel/compiler: support negate and abs of half float immediates

2018-05-03 Thread Iago Toral
On Thu, 2018-05-03 at 08:39 +0200, Iago Toral wrote:
> On Wed, 2018-05-02 at 17:57 -0700, Jason Ekstrand wrote:
> > Reviewed-by: Jason Ekstrand 
> > 
> > Have I reviewed everything?  Can we land shaderInt16 now?
> 
> Yes, all patches are reviewed now, thanks Jason.
> I'll send the final set of patches to Jenkins one last time and push
> them today  if we don't see any unexpected results.

I have just pushed the patches to master.
Iago
> Iago
> > --Jason
> > 
> > On Wed, May 2, 2018 at 5:18 PM, Jose Maria Casanova Crespo  > o...@igalia.com> wrote:
> > > ---
> > > 
> > >  src/intel/compiler/brw_shader.cpp | 6 --
> > > 
> > >  1 file changed, 4 insertions(+), 2 deletions(-)
> > > 
> > > 
> > > 
> > > diff --git a/src/intel/compiler/brw_shader.cpp
> > > b/src/intel/compiler/brw_shader.cpp
> > > 
> > > index 284c2e8233c..537defd05d9 100644
> > > 
> > > --- a/src/intel/compiler/brw_shader.cpp
> > > 
> > > +++ b/src/intel/compiler/brw_shader.cpp
> > > 
> > > @@ -605,7 +605,8 @@ brw_negate_immediate(enum brw_reg_type type,
> > > struct brw_reg *reg)
> > > 
> > > case BRW_REGISTER_TYPE_V:
> > > 
> > >assert(!"unimplemented: negate UV/V immediate");
> > > 
> > > case BRW_REGISTER_TYPE_HF:
> > > 
> > > -  assert(!"unimplemented: negate HF immediate");
> > > 
> > > +  reg->ud ^= 0x80008000;
> > > 
> > > +  return true;
> > > 
> > > case BRW_REGISTER_TYPE_NF:
> > > 
> > >unreachable("no NF immediates");
> > > 
> > > }
> > > 
> > > @@ -651,7 +652,8 @@ brw_abs_immediate(enum brw_reg_type type,
> > > struct brw_reg *reg)
> > > 
> > > case BRW_REGISTER_TYPE_V:
> > > 
> > >assert(!"unimplemented: abs V immediate");
> > > 
> > > case BRW_REGISTER_TYPE_HF:
> > > 
> > > -  assert(!"unimplemented: abs HF immediate");
> > > 
> > > +  reg->ud &= ~0x80008000;
> > > 
> > > +  return true;
> > > 
> > > case BRW_REGISTER_TYPE_NF:
> > > 
> > >unreachable("no NF immediates");
> > > 
> > > }
> > > 
> > > -- 
> > > 
> > > 2.14.3
> > > 
> > > 
> > > ___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/genxml: recognize 0x, 0o and 0b when setting default value

2018-05-03 Thread Lionel Landwerlin

Nice addition :)

Reviewed-by: Lionel Landwerlin 

On 02/05/18 22:48, Caio Marcelo de Oliveira Filho wrote:

Remove the need of converting values that are documented in
hexadecimal. This patch would allow writing

 

instead of

 
---
  src/intel/genxml/gen_pack_header.py | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/intel/genxml/gen_pack_header.py 
b/src/intel/genxml/gen_pack_header.py
index 8989f625d3..6a4c8033a7 100644
--- a/src/intel/genxml/gen_pack_header.py
+++ b/src/intel/genxml/gen_pack_header.py
@@ -241,7 +241,8 @@ class Field(object):
  self.prefix = None
  
  if "default" in attrs:

-self.default = int(attrs["default"])
+# Base 0 recognizes 0x, 0o, 0b prefixes in addition to decimal 
ints.
+self.default = int(attrs["default"], base=0)
  else:
  self.default = None
  



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/16] ac: enable both RBs on Kaveri

2018-05-03 Thread Michel Dänzer
On 2018-05-02 09:11 PM, Marek Olšák wrote:
> If you are shader-bound, you won't see any difference. In order to become
> RB-bound, you need more work in RBs than shaders. Blending or MSAA might
> help add that.

Indeed,

 vblank_mode=0 glxgears -samples 8

shows a significant difference in performance with amdgpu.


Unfortunately, the patch breaks MSAA with radeon, showing flickering of
incorrectly rendered tiles. So I'm afraid as-is this can still only be
done with amdgpu.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4] i965: Fix ETC2/EAC GetCompressed* functions on Gen7 GPUs

2018-05-03 Thread Eleni Maria Stea
Gen 7 GPUs store the compressed EAC/ETC2 images in other non-compressed
formats that can render. When GetCompressed* functions are called, the
pixels are returned in the non-compressed format that is used for the
rendering.

With this patch we store both the compressed and non-compressed versions
of the image, so that both rendering commands and GetCompressed*
commands work.

Also, the assertions for GL_MAP_WRITE_BIT and GL_MAP_INVALIDATE_RANGE_BIT
in intel_miptree_map_etc function have been removed because when the
miptree is mapped for reading (for example from a GetCompress*
function) the GL_MAP_WRITE_BIT won't be set (and shouldn't be set).

Fixes: the following test in CTS for gen7:
KHR-GL45.direct_state_access.textures_compressed_subimage test

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104272
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=81843

v2: fixes issues:
   a) initialized uninitialized variables (Juan A. Suarez, Andres Gomez)
   b) fixed race condition where mt and cmt were mapped at the same time
   c) fixed indentation issues (Andres Gomez)
v3: adds bugzilla bug with id: 104272
v4: adds bugzilla bug with id: 81843
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c |  10 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  21 
 src/mesa/drivers/dri/i965/intel_tex.c | 151 ++
 src/mesa/drivers/dri/i965/intel_tex.h |   8 ++
 src/mesa/drivers/dri/i965/intel_tex_image.c   |  94 +++-
 src/mesa/drivers/dri/i965/intel_tex_obj.h |   8 ++
 6 files changed, 260 insertions(+), 32 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index b9a564552d..2eba8c792c 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -734,9 +734,10 @@ miptree_create(struct brw_context *brw,
mesa_format etc_format = MESA_FORMAT_NONE;
uint32_t alloc_flags = 0;
 
-   format = intel_lower_compressed_format(brw, format);
-
-   etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
+   if (!(flags & MIPTREE_CREATE_ETC)) {
+  format = intel_lower_compressed_format(brw, format);
+  etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
+   }
 
if (flags & MIPTREE_CREATE_BUSY)
   alloc_flags |= BO_ALLOC_BUSY;
@@ -3393,9 +3394,6 @@ intel_miptree_map_etc(struct brw_context *brw,
   assert(mt->format == MESA_FORMAT_R8G8B8X8_UNORM);
}
 
-   assert(map->mode & GL_MAP_WRITE_BIT);
-   assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
-
map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
 map->w, map->h, 1));
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 8cea562dfa..7f70a6c341 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -405,6 +405,27 @@ enum intel_miptree_create_flags {
 * that the miptree will be created with mt->aux_usage == NONE.
 */
MIPTREE_CREATE_NO_AUX   = 1 << 2,
+
+   /** Create a second miptree for the compressed pixels (Gen7 only)
+*
+* On Gen7, we need to store 2 miptrees for some compressed
+* formats so we can handle rendering as well as getting the
+* compressed image data. This flag indicates that the miptree
+* is expected to hold compressed data for the latter case.
+*/
+   MIPTREE_CREATE_ETC  = 1 << 3,
+};
+
+enum intel_miptree_upload_flags {
+   MIPTREE_UPLOAD_DEFAULT = 0,
+
+   /** Upload the miptree that holds the compressed pixels (Gen 7 only)
+*
+* On Gen7, sometimes we need to map the miptree that stores the
+* image data for the rendering and sometimes the miptree that holds
+* the compressed data. This flag is for the latter case.
+*/
+   MIPTREE_UPLOAD_ETC,
 };
 
 struct intel_mipmap_tree *intel_miptree_create(struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/i965/intel_tex.c 
b/src/mesa/drivers/dri/i965/intel_tex.c
index 0650b6e629..54a0431265 100644
--- a/src/mesa/drivers/dri/i965/intel_tex.c
+++ b/src/mesa/drivers/dri/i965/intel_tex.c
@@ -66,6 +66,8 @@ intel_alloc_texture_image_buffer(struct gl_context *ctx,
struct intel_texture_image *intel_image = intel_texture_image(image);
struct gl_texture_object *texobj = image->TexObject;
struct intel_texture_object *intel_texobj = intel_texture_object(texobj);
+   struct gen_device_info *devinfo = >screen->devinfo;
+   mesa_format fmt = image->TexFormat;
 
assert(image->Border == 0);
 
@@ -110,6 +112,33 @@ intel_alloc_texture_image_buffer(struct gl_context *ctx,
   image->Width, image->Height, image->Depth, intel_image->mt);
}
 
+   if (devinfo->gen == 7 && _mesa_is_format_etc2(fmt)) {
+  if (intel_texobj->cmt &&
+  

[Mesa-dev] [PATCH 1/2] mesa: add R10G10B10{A, X}2 to MESA <-> DRI format translations

2018-05-03 Thread Miguel Casas
This patch adds R10G10B10{A,X}2 MESA <-> DRI translation entries
in the appropriate places for dri2 functions to accept them.

BUG=https://crbug.com/776093
TEST=Compile and deploy mesa+this patch, then playback
a VP9 Profile 2 video with sw decoder using crrev.com/c/897894.
---
 src/mesa/drivers/dri/common/dri_util.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/mesa/drivers/dri/common/dri_util.c 
b/src/mesa/drivers/dri/common/dri_util.c
index 7cb6248b13..78c6bbf234 100644
--- a/src/mesa/drivers/dri/common/dri_util.c
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -886,6 +886,10 @@ driGLFormatToImageFormat(mesa_format format)
   return __DRI_IMAGE_FORMAT_ARGB2101010;
case MESA_FORMAT_B10G10R10X2_UNORM:
   return __DRI_IMAGE_FORMAT_XRGB2101010;
+   case MESA_FORMAT_R10G10B10A2_UNORM:
+  return __DRI_IMAGE_FORMAT_ABGR2101010;
+   case MESA_FORMAT_R10G10B10X2_UNORM:
+  return __DRI_IMAGE_FORMAT_XBGR2101010;
case MESA_FORMAT_B8G8R8A8_UNORM:
   return __DRI_IMAGE_FORMAT_ARGB;
case MESA_FORMAT_R8G8B8A8_UNORM:
@@ -923,6 +927,10 @@ driImageFormatToGLFormat(uint32_t image_format)
   return MESA_FORMAT_B10G10R10A2_UNORM;
case __DRI_IMAGE_FORMAT_XRGB2101010:
   return MESA_FORMAT_B10G10R10X2_UNORM;
+   case __DRI_IMAGE_FORMAT_ABGR2101010:
+  return MESA_FORMAT_R10G10B10A2_UNORM;
+   case __DRI_IMAGE_FORMAT_XBGR2101010:
+  return MESA_FORMAT_R10G10B10X2_UNORM;
case __DRI_IMAGE_FORMAT_ARGB:
   return MESA_FORMAT_B8G8R8A8_UNORM;
case __DRI_IMAGE_FORMAT_ABGR:
-- 
2.17.0.441.gb46fe60e1d-goog

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965: add {X, A}BGR2101010 to |intel_image_formats|

2018-05-03 Thread Miguel Casas
This patch adds {X,A}BGR2101010 entries to the list of supported
|intel_image_formats|.

BUG=https://crbug.com/776093
TEST=Compile and deploy mesa this patch, then playback
a VP9 Profile 2 video with sw decoder using crrev.com/c/897894.
---
 src/mesa/drivers/dri/i965/intel_screen.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 409f763b64..d3488b9f29 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -190,6 +190,12 @@ static const struct intel_image_format 
intel_image_formats[] = {
{ __DRI_IMAGE_FOURCC_XRGB2101010, __DRI_IMAGE_COMPONENTS_RGB, 1,
  { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB2101010, 4 } } },
 
+   { __DRI_IMAGE_FOURCC_ABGR2101010, __DRI_IMAGE_COMPONENTS_RGBA, 1,
+ { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR2101010, 4 } } },
+
+   { __DRI_IMAGE_FOURCC_XBGR2101010, __DRI_IMAGE_COMPONENTS_RGB, 1,
+ { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR2101010, 4 } } },
+
{ __DRI_IMAGE_FOURCC_ARGB, __DRI_IMAGE_COMPONENTS_RGBA, 1,
  { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB, 4 } } },
 
-- 
2.17.0.441.gb46fe60e1d-goog

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 7.5/18] intel/compiler: support negate and abs of half float immediates

2018-05-03 Thread Iago Toral
On Wed, 2018-05-02 at 17:57 -0700, Jason Ekstrand wrote:
> Reviewed-by: Jason Ekstrand 
> 
> Have I reviewed everything?  Can we land shaderInt16 now?

Yes, all patches are reviewed now, thanks Jason.I'll send the final set
of patches to Jenkins one last time and push them today  if we don't
see any unexpected results.
Iago
> --Jason
> 
> On Wed, May 2, 2018 at 5:18 PM, Jose Maria Casanova Crespo  a...@igalia.com> wrote:
> > ---
> > 
> >  src/intel/compiler/brw_shader.cpp | 6 --
> > 
> >  1 file changed, 4 insertions(+), 2 deletions(-)
> > 
> > 
> > 
> > diff --git a/src/intel/compiler/brw_shader.cpp
> > b/src/intel/compiler/brw_shader.cpp
> > 
> > index 284c2e8233c..537defd05d9 100644
> > 
> > --- a/src/intel/compiler/brw_shader.cpp
> > 
> > +++ b/src/intel/compiler/brw_shader.cpp
> > 
> > @@ -605,7 +605,8 @@ brw_negate_immediate(enum brw_reg_type type,
> > struct brw_reg *reg)
> > 
> > case BRW_REGISTER_TYPE_V:
> > 
> >assert(!"unimplemented: negate UV/V immediate");
> > 
> > case BRW_REGISTER_TYPE_HF:
> > 
> > -  assert(!"unimplemented: negate HF immediate");
> > 
> > +  reg->ud ^= 0x80008000;
> > 
> > +  return true;
> > 
> > case BRW_REGISTER_TYPE_NF:
> > 
> >unreachable("no NF immediates");
> > 
> > }
> > 
> > @@ -651,7 +652,8 @@ brw_abs_immediate(enum brw_reg_type type,
> > struct brw_reg *reg)
> > 
> > case BRW_REGISTER_TYPE_V:
> > 
> >assert(!"unimplemented: abs V immediate");
> > 
> > case BRW_REGISTER_TYPE_HF:
> > 
> > -  assert(!"unimplemented: abs HF immediate");
> > 
> > +  reg->ud &= ~0x80008000;
> > 
> > +  return true;
> > 
> > case BRW_REGISTER_TYPE_NF:
> > 
> >unreachable("no NF immediates");
> > 
> > }
> > 
> > -- 
> > 
> > 2.14.3
> > 
> > 
> > ___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 21/22] i965: Setup glsl uniforms by index rather than name matching

2018-05-03 Thread Timothy Arceri

On 18/04/18 00:36, Alejandro Piñeiro wrote:

From: Neil Roberts 

Previously when setting up a uniform it would try to walk the uniform
storage slots and find one that matches the name of the given
variable. However, each variable already has a location which is an
index into the UniformStorage array so we can just directly jump to
the right slot. Some of the variables take up more than one slot so we
still need to calculate how many it uses.

The main reason to do this is to support ARB_gl_spirv because in that
case the uniforms don’t have names so the previous approach won’t
work.


This is a nice improvement away :)

However it might be nicer to just update the existing 
glsl_type::uniform_locations() helper

rather than create the custom count_uniform_storage_slots() helper.

e.g

unsigned
glsl_type::uniform_locations(bool uniform_storage_locs) const
{
   unsigned size = 0;

   switch (this->base_type) {
   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT:
   case GLSL_TYPE_FLOAT:
   case GLSL_TYPE_FLOAT16:
   case GLSL_TYPE_DOUBLE:
   case GLSL_TYPE_UINT16:
   case GLSL_TYPE_UINT8:
   case GLSL_TYPE_INT16:
   case GLSL_TYPE_INT8:
   case GLSL_TYPE_UINT64:
   case GLSL_TYPE_INT64:
   case GLSL_TYPE_BOOL:
   case GLSL_TYPE_SAMPLER:
   case GLSL_TYPE_IMAGE:
   case GLSL_TYPE_SUBROUTINE:
  return 1;

   case GLSL_TYPE_STRUCT:
   case GLSL_TYPE_INTERFACE:
  for (unsigned i = 0; i < this->length; i++)
 size += this->fields.structure[i].type->uniform_locations();
  return size;
   case GLSL_TYPE_ARRAY:
  if (!uniform_storage_locs ||
  this->fields.array->is_array() ||
  this->fields.array->is_interface() ||
  this->fields.array->is_record()) {
  /* For uniform locations passed to the user via the API we
   * count all array elements.
   */
 return this->length * this->fields.array->uniform_locations();
  } else {
 /* For uniform storage the innermost array only uses a
  * single slot.
  */
 return 1;
  }
   default:
  return 0;
   }
}

If you agree this patch is:

Reviewed-by: Timothy Arceri 


---
  src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 50 +++---
  1 file changed, 37 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp 
b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
index 62b2951432a..cb5e07f74ba 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
+++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
@@ -118,35 +118,59 @@ brw_setup_image_uniform_values(gl_shader_stage stage,
 }
  }
  
+static unsigned

+count_uniform_storage_slots(const struct glsl_type *type)
+{
+   /* gl_uniform_storage can cope with one level of array, so if the
+* type is a composite type or an array where each element occupies
+* more than one slot than we need to recursively process it.
+*/
+   if (glsl_type_is_struct(type)) {
+  unsigned location_count = 0;
+
+  for (unsigned i = 0; i < glsl_get_length(type); i++) {
+ const struct glsl_type *field_type = glsl_get_struct_field(type, i);
+
+ location_count += count_uniform_storage_slots(field_type);
+  }
+
+  return location_count;
+   }
+
+   if (glsl_type_is_array(type)) {
+  const struct glsl_type *element_type = glsl_get_array_element(type);
+
+  if (glsl_type_is_array(element_type) ||
+  glsl_type_is_struct(element_type)) {
+ unsigned element_count = count_uniform_storage_slots(element_type);
+ return element_count * glsl_get_length(type);
+  }
+   }
+
+   return 1;
+}
+
  static void
  brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
 const struct gl_program *prog,
 struct brw_stage_prog_data *stage_prog_data,
 bool is_scalar)
  {
-   int namelen = strlen(var->name);
-
 /* The data for our (non-builtin) uniforms is stored in a series of
  * gl_uniform_storage structs for each subcomponent that
  * glGetUniformLocation() could name.  We know it's been set up in the same
-* order we'd walk the type, so walk the list of storage and find anything
-* with our name, or the prefix of a component that starts with our name.
+* order we'd walk the type, so walk the list of storage that matches the
+* range of slots covered by this variable.
  */
 unsigned uniform_index = var->data.driver_location / 4;
-   for (unsigned u = 0; u < prog->sh.data->NumUniformStorage; u++) {
+   unsigned num_slots = count_uniform_storage_slots(var->type);
+   for (unsigned u = 0; u < num_slots; u++) {
struct gl_uniform_storage *storage =
- >sh.data->UniformStorage[u];
+ >sh.data->UniformStorage[var->data.location + u];
  
if (storage->builtin || storage->type->is_sampler())

   continue;
  
-  if