Re: [Mesa-dev] [PATCH 02/10] intel/fs: Implement quad swizzles on ICL+.

2019-01-07 Thread Iago Toral
On Sat, 2018-12-29 at 12:38 -0800, Francisco Jerez wrote:
> Align16 is no longer a thing, so a new implementation is provided
> using Align1 instead.  Not all possible swizzles can be represented
> as
> a single Align1 region, but some fast paths are provided for
> frequently used swizzles that can be represented efficiently in
> Align1
> mode.
> 
> Fixes ~90 subgroup quad swap Vulkan CTS tests.
> 
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>  src/intel/compiler/brw_fs.cpp   | 25 +++-
>  src/intel/compiler/brw_fs.h |  4 ++
>  src/intel/compiler/brw_fs_generator.cpp | 82 ---
> --
>  3 files changed, 93 insertions(+), 18 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_fs.cpp
> b/src/intel/compiler/brw_fs.cpp
> index 2f0f0151219..97544fdf465 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -315,6 +315,20 @@ fs_inst::has_source_and_destination_hazard()
> const
> * may stomp all over it.
> */
>return true;
> +   case SHADER_OPCODE_QUAD_SWIZZLE:
> +  switch (src[1].ud) {

Maybe it is worth adding a small comment here indicating that these are
the cases where we implement the opcode as a single instruction and
refer to the generator for details?

> +  case BRW_SWIZZLE_:
> +  case BRW_SWIZZLE_:
> +  case BRW_SWIZZLE_:
> +  case BRW_SWIZZLE_:
> +  case BRW_SWIZZLE_XXZZ:
> +  case BRW_SWIZZLE_YYWW:
> +  case BRW_SWIZZLE_XYXY:
> +  case BRW_SWIZZLE_ZWZW:
> + return false;
> +  default:
> + return !is_uniform(src[0]);

Shouldn't this be:

return !is_uniform(src[0]) ||
   (devinfo->gen < 11 && type_sz(src.type) == 4);

Since in that case we also implement the opcode with a single ALIGN16
instruction.

> +  }
> default:
>/* The SIMD16 compressed instruction
> *
> @@ -5579,9 +5593,14 @@ get_lowered_simd_width(const struct
> gen_device_info *devinfo,
> case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
>return MIN2(8, inst->exec_size);
>  
> -   case SHADER_OPCODE_QUAD_SWIZZLE:
> -  return 8;
> -
> +   case SHADER_OPCODE_QUAD_SWIZZLE: {
> +  const unsigned swiz = inst->src[1].ud;
> +  return (is_uniform(inst->src[0]) ?
> + get_fpu_lowered_simd_width(devinfo, inst) :
> +  devinfo->gen < 11 && type_sz(inst->src[0].type) == 4 ?
> 8 :
> +  swiz == BRW_SWIZZLE_XYXY || swiz == BRW_SWIZZLE_ZWZW ?
> 4 :
> +  get_fpu_lowered_simd_width(devinfo, inst));
> +   }
> case SHADER_OPCODE_MOV_INDIRECT: {
>/* From IVB and HSW PRMs:
> *
> diff --git a/src/intel/compiler/brw_fs.h
> b/src/intel/compiler/brw_fs.h
> index 53d9b6ce7bf..dc36ecc21ac 100644
> --- a/src/intel/compiler/brw_fs.h
> +++ b/src/intel/compiler/brw_fs.h
> @@ -480,6 +480,10 @@ private:
>   struct brw_reg src,
>   struct brw_reg idx);
>  
> +   void generate_quad_swizzle(const fs_inst *inst,
> +  struct brw_reg dst, struct brw_reg
> src,
> +  unsigned swiz);
> +
> bool patch_discard_jumps_to_fb_writes();
>  
> const struct brw_compiler *compiler;
> diff --git a/src/intel/compiler/brw_fs_generator.cpp
> b/src/intel/compiler/brw_fs_generator.cpp
> index 08dd83dded7..84627e83132 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -582,6 +582,72 @@ fs_generator::generate_shuffle(fs_inst *inst,
> }
>  }
>  
> +void
> +fs_generator::generate_quad_swizzle(const fs_inst *inst,
> +struct brw_reg dst, struct
> brw_reg src,
> +unsigned swiz)
> +{
> +   /* Requires a quad. */
> +   assert(inst->exec_size >= 4);
> +
> +   if (src.file == BRW_IMMEDIATE_VALUE ||
> +   has_scalar_region(src)) {
> +  /* The value is uniform across all channels */
> +  brw_MOV(p, dst, src);
> +
> +   } else if (devinfo->gen < 11 && type_sz(src.type) == 4) {
> +  /* This only works on 8-wide 32-bit values */
> +  assert(inst->exec_size == 8);
> +  assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
> +  assert(src.vstride == src.width + 1);
> +  brw_set_default_access_mode(p, BRW_ALIGN_16);
> +  struct brw_reg swiz_src = stride(src, 4, 4, 1);
> +  swiz_src.swizzle = swiz;
> +  brw_MOV(p, dst, swiz_src);
> +

Extra blank line.

> +   } else {
> +  assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
> +  assert(src.vstride == src.width + 1);
> +  const struct brw_reg src_0 = suboffset(src, BRW_GET_SWZ(swiz,
> 0));
> +
> +  switch (swiz) {
> +  case BRW_SWIZZLE_:
> +  case BRW_SWIZZLE_:
> +  case BRW_SWIZZLE_:
> +  case BRW_SWIZZLE_:
> + brw_MOV(p, dst, stride(src_0, 4, 4, 0));
> + break;
> +
> +  case BRW_SWIZZLE_XXZZ:
> +  case BRW_SWIZZLE_YYWW:
> + 

Re: [Mesa-dev] [PATCH 08/10] intel/fs: Remove existing lower_conversions pass.

2019-01-07 Thread Iago Toral
On Sat, 2018-12-29 at 12:39 -0800, Francisco Jerez wrote:
> It's redundant with the functionality provided by lower_regioning
> now.
> ---
>  src/intel/Makefile.sources|   1 -
>  src/intel/compiler/brw_fs.cpp |   1 -
>  src/intel/compiler/brw_fs.h   |   1 -
>  .../compiler/brw_fs_lower_conversions.cpp | 132 
> --
>  src/intel/compiler/meson.build|   1 -
>  5 files changed, 136 deletions(-)
>  delete mode 100644 src/intel/compiler/brw_fs_lower_conversions.cpp
> 
> diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
> index 6b9874d2b80..fe06a57b42e 100644
> --- a/src/intel/Makefile.sources
> +++ b/src/intel/Makefile.sources
> @@ -62,7 +62,6 @@ COMPILER_FILES = \
>   compiler/brw_fs.h \
>   compiler/brw_fs_live_variables.cpp \
>   compiler/brw_fs_live_variables.h \
> - compiler/brw_fs_lower_conversions.cpp \
>   compiler/brw_fs_lower_pack.cpp \
>   compiler/brw_fs_lower_regioning.cpp \
>   compiler/brw_fs_nir.cpp \
> diff --git a/src/intel/compiler/brw_fs.cpp
> b/src/intel/compiler/brw_fs.cpp
> index caa7a798332..d6280d558ec 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -6472,7 +6472,6 @@ fs_visitor::optimize()
> }
>  
> progress = false;
> -   OPT(lower_conversions);
> OPT(lower_regioning);
> if (progress) {
>OPT(opt_copy_propagation);

If you didn't do this in the previous patch, then maybe do it here:

if (OPT(lower_regioning)) {
   ...
}

and avoid resetting progress.

> diff --git a/src/intel/compiler/brw_fs.h
> b/src/intel/compiler/brw_fs.h
> index 36825754931..7edaa3af43c 100644
> --- a/src/intel/compiler/brw_fs.h
> +++ b/src/intel/compiler/brw_fs.h
> @@ -165,7 +165,6 @@ public:
> bool lower_load_payload();
> bool lower_pack();
> bool lower_regioning();
> -   bool lower_conversions();
> bool lower_logical_sends();
> bool lower_integer_multiplication();
> bool lower_minmax();
> diff --git a/src/intel/compiler/brw_fs_lower_conversions.cpp
> b/src/intel/compiler/brw_fs_lower_conversions.cpp
> deleted file mode 100644
> index 145fb55f995..000
> --- a/src/intel/compiler/brw_fs_lower_conversions.cpp
> +++ /dev/null
> @@ -1,132 +0,0 @@
> -/*
> - * Copyright © 2015 Connor Abbott
> - *
> - * Permission is hereby granted, free of charge, to any person
> obtaining a
> - * copy of this software and associated documentation files (the
> "Software"),
> - * to deal in the Software without restriction, including without
> limitation
> - * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> - * and/or sell copies of the Software, and to permit persons to whom
> the
> - * Software is furnished to do so, subject to the following
> conditions:
> - *
> - * The above copyright notice and this permission notice (including
> the next
> - * paragraph) shall be included in all copies or substantial
> portions of the
> - * Software.
> - *
> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> EVENT SHALL
> - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
> OR OTHER
> - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> OTHER DEALINGS
> - * IN THE SOFTWARE.
> - */
> -
> -#include "brw_fs.h"
> -#include "brw_cfg.h"
> -#include "brw_fs_builder.h"
> -
> -using namespace brw;
> -
> -static bool
> -supports_type_conversion(const fs_inst *inst) {
> -   switch (inst->opcode) {
> -   case BRW_OPCODE_MOV:
> -   case SHADER_OPCODE_MOV_INDIRECT:
> -  return true;
> -   case BRW_OPCODE_SEL:
> -  return inst->dst.type == get_exec_type(inst);
> -   default:
> -  /* FIXME: We assume the opcodes don't explicitly mentioned
> -   * before just work fine with arbitrary conversions.
> -   */
> -  return true;
> -   }
> -}
> -
> -/* From the SKL PRM Vol 2a, "Move":
> - *
> - *"A mov with the same source and destination type, no source
> modifier,
> - * and no saturation is a raw move. A packed byte destination
> region (B
> - * or UB type with HorzStride == 1 and ExecSize > 1) can only be
> written
> - * using raw move."
> - */
> -static bool
> -is_byte_raw_mov (const fs_inst *inst)
> -{
> -   return type_sz(inst->dst.type) == 1 &&
> -  inst->opcode == BRW_OPCODE_MOV &&
> -  inst->src[0].type == inst->dst.type &&
> -  !inst->saturate &&
> -  !inst->src[0].negate &&
> -  !inst->src[0].abs;
> -}
> -
> -bool
> -fs_visitor::lower_conversions()
> -{
> -   bool progress = false;
> -
> -   foreach_block_and_inst(block, fs_inst, inst, cfg) {
> -  const fs_builder ibld(this, block, inst);
> -  fs_reg dst = inst->dst;
> -  bool 

[Mesa-dev] [PATCH 4/6] anv/blorp: Refactor MSAA resolves into an exportable helper function

2019-01-07 Thread Jason Ekstrand
This function is modeled after the aux_op functions except that it has a
lot more parameters because it deals with two images as well as source
and destination regions.
---
 src/intel/vulkan/anv_blorp.c   | 225 ++---
 src/intel/vulkan/anv_private.h |  14 ++
 2 files changed, 107 insertions(+), 132 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index eee7a8c3b3c..2f8d502e289 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1169,63 +1169,52 @@ enum subpass_stage {
SUBPASS_STAGE_RESOLVE,
 };
 
-static void
-resolve_surface(struct blorp_batch *batch,
-struct blorp_surf *src_surf,
-uint32_t src_level, uint32_t src_layer,
-struct blorp_surf *dst_surf,
-uint32_t dst_level, uint32_t dst_layer,
-uint32_t src_x, uint32_t src_y, uint32_t dst_x, uint32_t dst_y,
-uint32_t width, uint32_t height,
-enum blorp_filter filter)
-{
-   blorp_blit(batch,
-  src_surf, src_level, src_layer,
-  ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
-  dst_surf, dst_level, dst_layer,
-  ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
-  src_x, src_y, src_x + width, src_y + height,
-  dst_x, dst_y, dst_x + width, dst_y + height,
-  filter, false, false);
-}
-
-static void
-resolve_image(struct anv_device *device,
-  struct blorp_batch *batch,
-  const struct anv_image *src_image,
-  VkImageLayout src_image_layout,
-  uint32_t src_level, uint32_t src_layer,
-  const struct anv_image *dst_image,
-  VkImageLayout dst_image_layout,
-  uint32_t dst_level, uint32_t dst_layer,
-  VkImageAspectFlags aspect_mask,
-  uint32_t src_x, uint32_t src_y, uint32_t dst_x, uint32_t dst_y,
-  uint32_t width, uint32_t height)
+void
+anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
+   const struct anv_image *src_image,
+   enum isl_aux_usage src_aux_usage,
+   uint32_t src_level, uint32_t src_base_layer,
+   const struct anv_image *dst_image,
+   enum isl_aux_usage dst_aux_usage,
+   uint32_t dst_level, uint32_t dst_base_layer,
+   VkImageAspectFlagBits aspect,
+   uint32_t src_x, uint32_t src_y,
+   uint32_t dst_x, uint32_t dst_y,
+   uint32_t width, uint32_t height,
+   uint32_t layer_count,
+   enum blorp_filter filter)
 {
-   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
+   struct blorp_batch batch;
+   blorp_batch_init(_buffer->device->blorp, , cmd_buffer, 0);
 
assert(src_image->type == VK_IMAGE_TYPE_2D);
assert(src_image->samples > 1);
assert(dst_image->type == VK_IMAGE_TYPE_2D);
assert(dst_image->samples == 1);
assert(src_image->n_planes == dst_image->n_planes);
+   assert(!src_image->format->can_ycbcr);
+   assert(!dst_image->format->can_ycbcr);
 
-   uint32_t aspect_bit;
-
-   anv_foreach_image_aspect_bit(aspect_bit, src_image, aspect_mask) {
-  struct blorp_surf src_surf, dst_surf;
-  get_blorp_surf_for_anv_image(device, src_image, 1UL << aspect_bit,
-   src_image_layout, ISL_AUX_USAGE_NONE,
-   _surf);
-  get_blorp_surf_for_anv_image(device, dst_image, 1UL << aspect_bit,
-   dst_image_layout, ISL_AUX_USAGE_NONE,
-   _surf);
-  anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
-1UL << aspect_bit,
-dst_surf.aux_usage,
-dst_level, dst_layer, 1);
-
-  enum blorp_filter filter;
+   struct blorp_surf src_surf, dst_surf;
+   get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, aspect,
+ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
+src_aux_usage, _surf);
+   if (src_aux_usage == ISL_AUX_USAGE_MCS) {
+  src_surf.clear_color_addr = anv_to_blorp_address(
+ anv_image_get_clear_color_addr(cmd_buffer->device, src_image,
+VK_IMAGE_ASPECT_COLOR_BIT));
+   }
+   get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, aspect,
+ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
+dst_aux_usage, _surf);
+   anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
+ aspect, dst_aux_usage,
+ dst_level, dst_base_layer, layer_count);
+
+   if (filter == BLORP_FILTER_NONE) {
+  /* If no 

[Mesa-dev] [PATCH 2/6] intel/blorp: Add two more filter modes

2019-01-07 Thread Jason Ekstrand
---
 src/intel/blorp/blorp.h  |  2 ++
 src/intel/blorp/blorp_blit.c | 62 +---
 2 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
index 1e22712602d..bb83650092a 100644
--- a/src/intel/blorp/blorp.h
+++ b/src/intel/blorp/blorp.h
@@ -125,6 +125,8 @@ enum blorp_filter {
BLORP_FILTER_BILINEAR,
BLORP_FILTER_SAMPLE_0,
BLORP_FILTER_AVERAGE,
+   BLORP_FILTER_MIN_SAMPLE,
+   BLORP_FILTER_MAX_SAMPLE,
 };
 
 void
diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index a5da7d015e2..7b50de6d53e 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -588,10 +588,11 @@ static inline int count_trailing_one_bits(unsigned value)
 }
 
 static nir_ssa_def *
-blorp_nir_manual_blend_average(nir_builder *b, struct brw_blorp_blit_vars *v,
-   nir_ssa_def *pos, unsigned tex_samples,
-   enum isl_aux_usage tex_aux_usage,
-   nir_alu_type dst_type)
+blorp_nir_combine_samples(nir_builder *b, struct brw_blorp_blit_vars *v,
+  nir_ssa_def *pos, unsigned tex_samples,
+  enum isl_aux_usage tex_aux_usage,
+  nir_alu_type dst_type,
+  enum blorp_filter filter)
 {
/* If non-null, this is the outer-most if statement */
nir_if *outer_if = NULL;
@@ -603,6 +604,35 @@ blorp_nir_manual_blend_average(nir_builder *b, struct 
brw_blorp_blit_vars *v,
if (tex_aux_usage == ISL_AUX_USAGE_MCS)
   mcs = blorp_blit_txf_ms_mcs(b, v, pos);
 
+   nir_op combine_op;
+   switch (filter) {
+   case BLORP_FILTER_AVERAGE:
+  assert(dst_type == nir_type_float);
+  combine_op = nir_op_fadd;
+  break;
+
+   case BLORP_FILTER_MIN_SAMPLE:
+  switch (dst_type) {
+  case nir_type_int:   combine_op = nir_op_imin;  break;
+  case nir_type_uint:  combine_op = nir_op_umin;  break;
+  case nir_type_float: combine_op = nir_op_fmin;  break;
+  default: unreachable("Invalid dst_type");
+  }
+  break;
+
+   case BLORP_FILTER_MAX_SAMPLE:
+  switch (dst_type) {
+  case nir_type_int:   combine_op = nir_op_imax;  break;
+  case nir_type_uint:  combine_op = nir_op_umax;  break;
+  case nir_type_float: combine_op = nir_op_fmax;  break;
+  default: unreachable("Invalid dst_type");
+  }
+  break;
+
+   default:
+  unreachable("Invalid filter");
+   }
+
/* We add together samples using a binary tree structure, e.g. for 4x MSAA:
 *
 *   result = ((sample[0] + sample[1]) + (sample[2] + sample[3])) / 4
@@ -689,18 +719,22 @@ blorp_nir_manual_blend_average(nir_builder *b, struct 
brw_blorp_blit_vars *v,
  assert(stack_depth >= 2);
  --stack_depth;
 
- assert(dst_type == nir_type_float);
  texture_data[stack_depth - 1] =
-nir_fadd(b, texture_data[stack_depth - 1],
-texture_data[stack_depth]);
+nir_build_alu(b, combine_op,
+ texture_data[stack_depth - 1],
+ texture_data[stack_depth],
+ NULL, NULL);
   }
}
 
/* We should have just 1 sample on the stack now. */
assert(stack_depth == 1);
 
-   texture_data[0] = nir_fmul(b, texture_data[0],
-  nir_imm_float(b, 1.0 / tex_samples));
+   if (filter == BLORP_FILTER_AVERAGE) {
+  assert(dst_type == nir_type_float);
+  texture_data[0] = nir_fmul(b, texture_data[0],
+ nir_imm_float(b, 1.0 / tex_samples));
+   }
 
nir_store_var(b, color, texture_data[0], 0xf);
 
@@ -1351,6 +1385,8 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, 
void *mem_ctx,
   break;
 
case BLORP_FILTER_AVERAGE:
+   case BLORP_FILTER_MIN_SAMPLE:
+   case BLORP_FILTER_MAX_SAMPLE:
   assert(!key->src_tiled_w);
   assert(key->tex_samples == key->src_samples);
   assert(key->tex_layout == key->src_layout);
@@ -1369,15 +1405,17 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, 
void *mem_ctx,
   * to multiply our X and Y coordinates each by 2 and then add 1.
   */
  assert(key->src_coords_normalized);
+ assert(key->filter == BLORP_FILTER_AVERAGE);
  src_pos = nir_fadd(,
 nir_i2f32(, src_pos),
 nir_imm_float(, 0.5f));
  color = blorp_nir_tex(, , key, src_pos);
   } else {
  /* Gen7+ hardware doesn't automaticaly blend. */
- color = blorp_nir_manual_blend_average(, , src_pos, 
key->src_samples,
-key->tex_aux_usage,
-key->texture_data_type);
+ color = blorp_nir_combine_samples(, , src_pos, key->src_samples,
+   

Re: [Mesa-dev] [PATCH] radv: skip draws with instance_count == 0

2019-01-07 Thread Samuel Pitoiset


On 1/7/19 2:53 PM, Bas Nieuwenhuizen wrote:

On Mon, Jan 7, 2019 at 2:45 PM Samuel Pitoiset
 wrote:

Loosely based on RadeonSI.

Signed-off-by: Samuel Pitoiset 
---
  src/amd/vulkan/radv_cmd_buffer.c | 14 ++
  1 file changed, 14 insertions(+)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 1966098e08c..8e3f1a7f5f0 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -3697,6 +3697,20 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,
 radeon_check_space(cmd_buffer->device->ws,
cmd_buffer->cs, 4096);

+   if (likely(!info->indirect)) {
+   /* SI-CI treat instance_count==0 as instance_count==1. There is
+* no workaround for indirect draws, but we can at least skip
+* direct draws.
+*/
+   if (unlikely(!info->instance_count))
+   return;
+
+   /* Handle count == 0. */
+   if (unlikely(!info->count &&
+(info->indexed || !info->strmout_buffer)))

Why the extra condition of (info->indexed || !info->strmout_buffer)?


We need to check info->strmout_buffer, otherwise all 
CmdDrawIndirectByteCountEXT() will be skipped.


Though, that's actually useless to check indexed draws.




+   return;
+   }
+
 /* Use optimal packet order based on whether we need to sync the
  * pipeline.
  */
--
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/12] nir/vtn: add caps for some cl related capabilities

2019-01-07 Thread Jason Ekstrand
On Tue, Dec 4, 2018 at 12:26 PM Karol Herbst  wrote:

> From: Rob Clark 
>
> vtn supports these, so don't squalk if user is happy with enabling
> these.
>
> Signed-off-by: Karol Herbst 
> ---
>  src/compiler/shader_info.h |  3 +++
>  src/compiler/spirv/spirv_to_nir.c  | 17 ++---
>  src/compiler/spirv/vtn_variables.c |  6 --
>  3 files changed, 21 insertions(+), 5 deletions(-)
>
> diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
> index 65bc0588d67..5286cf8fc5f 100644
> --- a/src/compiler/shader_info.h
> +++ b/src/compiler/shader_info.h
> @@ -62,6 +62,9 @@ struct spirv_supported_capabilities {
> bool post_depth_coverage;
> bool transform_feedback;
> bool geometry_streams;
> +   bool address;
> +   bool kernel;
> +   bool int8;
>

I really hate the fact that this list isn't sorted in any meaningful way.
I just sent out a patch to sort it.  With that fixed and these put in the
right order, the first three patches are

Reviewed-by: Jason Ekstrand 


>  };
>
>  typedef struct shader_info {
> diff --git a/src/compiler/spirv/spirv_to_nir.c
> b/src/compiler/spirv/spirv_to_nir.c
> index 2c2dbe12a3c..e41a7e960ce 100644
> --- a/src/compiler/spirv/spirv_to_nir.c
> +++ b/src/compiler/spirv/spirv_to_nir.c
> @@ -783,8 +783,10 @@ struct_member_decoration_cb(struct vtn_builder *b,
> case SpvDecorationFPRoundingMode:
> case SpvDecorationFPFastMathMode:
> case SpvDecorationAlignment:
> -  vtn_warn("Decoration only allowed for CL-style kernels: %s",
> -   spirv_decoration_to_string(dec->decoration));
> +  if (b->shader->info.stage != MESA_SHADER_KERNEL) {
> + vtn_warn("Decoration only allowed for CL-style kernels: %s",
> +  spirv_decoration_to_string(dec->decoration));
> +  }
>break;
>
> case SpvDecorationHlslSemanticGOOGLE:
> @@ -3420,7 +3422,6 @@ vtn_handle_preamble_instruction(struct vtn_builder
> *b, SpvOp opcode,
>case SpvCapabilityFloat16:
>case SpvCapabilityInt64Atomics:
>case SpvCapabilityStorageImageMultisample:
> -  case SpvCapabilityInt8:
>case SpvCapabilitySparseResidency:
>case SpvCapabilityMinLod:
>   vtn_warn("Unsupported SPIR-V capability: %s",
> @@ -3449,8 +3450,18 @@ vtn_handle_preamble_instruction(struct vtn_builder
> *b, SpvOp opcode,
>   spv_check_supported(geometry_streams, cap);
>   break;
>
> +  case SpvCapabilityInt8:
> + spv_check_supported(int8, cap);
> + break;
> +
>case SpvCapabilityAddresses:
> + spv_check_supported(address, cap);
> + break;
> +
>case SpvCapabilityKernel:
> + spv_check_supported(kernel, cap);
> + break;
> +
>case SpvCapabilityImageBasic:
>case SpvCapabilityImageReadWrite:
>case SpvCapabilityImageMipmap:
> diff --git a/src/compiler/spirv/vtn_variables.c
> b/src/compiler/spirv/vtn_variables.c
> index 55721fc36e3..fe44e71800d 100644
> --- a/src/compiler/spirv/vtn_variables.c
> +++ b/src/compiler/spirv/vtn_variables.c
> @@ -1381,8 +1381,10 @@ apply_var_decoration(struct vtn_builder *b,
> case SpvDecorationFPRoundingMode:
> case SpvDecorationFPFastMathMode:
> case SpvDecorationAlignment:
> -  vtn_warn("Decoration only allowed for CL-style kernels: %s",
> -   spirv_decoration_to_string(dec->decoration));
> +  if (b->shader->info.stage != MESA_SHADER_KERNEL) {
> + vtn_warn("Decoration only allowed for CL-style kernels: %s",
> +  spirv_decoration_to_string(dec->decoration));
> +  }
>break;
>
> case SpvDecorationHlslSemanticGOOGLE:
> --
> 2.19.2
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] vulkan: Update the XML and headers to 1.1.97

2019-01-07 Thread Samuel Pitoiset

Acked-by: Samuel Pitoiset 

On 1/7/19 4:39 PM, Jason Ekstrand wrote:

---
  include/vulkan/vulkan.h |   2 +-
  include/vulkan/vulkan_android.h |   2 +-
  include/vulkan/vulkan_core.h| 155 +++-
  include/vulkan/vulkan_fuchsia.h |   2 +-
  include/vulkan/vulkan_ios.h |   2 +-
  include/vulkan/vulkan_macos.h   |   2 +-
  include/vulkan/vulkan_vi.h  |   2 +-
  include/vulkan/vulkan_wayland.h |   2 +-
  include/vulkan/vulkan_win32.h   |   2 +-
  include/vulkan/vulkan_xcb.h |   2 +-
  include/vulkan/vulkan_xlib.h|   2 +-
  include/vulkan/vulkan_xlib_xrandr.h |   2 +-
  src/vulkan/registry/vk.xml  | 175 +++-
  13 files changed, 311 insertions(+), 41 deletions(-)

diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h
index 77da63783e6..a3be4af6c46 100644
--- a/include/vulkan/vulkan.h
+++ b/include/vulkan/vulkan.h
@@ -2,7 +2,7 @@
  #define VULKAN_H_ 1
  
  /*

-** Copyright (c) 2015-2018 The Khronos Group Inc.
+** Copyright (c) 2015-2019 The Khronos Group Inc.
  **
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
diff --git a/include/vulkan/vulkan_android.h b/include/vulkan/vulkan_android.h
index 07aaeda28e3..e70376c8867 100644
--- a/include/vulkan/vulkan_android.h
+++ b/include/vulkan/vulkan_android.h
@@ -6,7 +6,7 @@ extern "C" {
  #endif
  
  /*

-** Copyright (c) 2015-2018 The Khronos Group Inc.
+** Copyright (c) 2015-2019 The Khronos Group Inc.
  **
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
diff --git a/include/vulkan/vulkan_core.h b/include/vulkan/vulkan_core.h
index 72542c72ec8..caeecd9bed1 100644
--- a/include/vulkan/vulkan_core.h
+++ b/include/vulkan/vulkan_core.h
@@ -6,7 +6,7 @@ extern "C" {
  #endif
  
  /*

-** Copyright (c) 2015-2018 The Khronos Group Inc.
+** Copyright (c) 2015-2019 The Khronos Group Inc.
  **
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
@@ -43,7 +43,7 @@ extern "C" {
  #define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff)
  #define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff)
  // Version of this file
-#define VK_HEADER_VERSION 96
+#define VK_HEADER_VERSION 97
  
  
  #define VK_NULL_HANDLE 0

@@ -148,6 +148,7 @@ typedef enum VkResult {
  VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT = -1000158000,
  VK_ERROR_FRAGMENTATION_EXT = -1000161000,
  VK_ERROR_NOT_PERMITTED_EXT = -1000174001,
+VK_ERROR_INVALID_DEVICE_ADDRESS_EXT = -1000244000,
  VK_ERROR_OUT_OF_POOL_MEMORY_KHR = VK_ERROR_OUT_OF_POOL_MEMORY,
  VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR = VK_ERROR_INVALID_EXTERNAL_HANDLE,
  VK_RESULT_BEGIN_RANGE = VK_ERROR_FRAGMENTED_POOL,
@@ -444,6 +445,8 @@ typedef enum VkStructureType {
  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT = 
1000190002,
  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR = 1000196000,
  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR = 
1000197000,
+VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR = 
1000199000,
+VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR = 
1000199001,
  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV 
= 1000201000,
  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV = 1000202000,
  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV = 1000202001,
@@ -460,7 +463,14 @@ typedef enum VkStructureType {
  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_PROPERTIES_EXT = 
1000218001,
  VK_STRUCTURE_TYPE_RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT = 
1000218002,
  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT = 
1000221000,
+VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT = 
1000237000,
+VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT = 
1000238000,
+VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT = 1000238001,
+VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_ADDRESS_FEATURES_EXT = 1000244000,
+VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT = 1000244001,
+VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT = 1000244002,
  VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO_EXT = 1000246000,
+VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT = 1000247000,
  VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT = 
VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT,
  VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR = 
VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO,
  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR = 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES,
@@ -1539,6 +1549,7 @@ typedef 

Re: [Mesa-dev] [PATCH] glsl: correct typo in GLSL compilation error message

2019-01-07 Thread Andres Gomez
On Mon, 2019-01-07 at 16:48 +0100, Erik Faye-Lund wrote:
> On Mon, 2019-01-07 at 15:50 +0200, Andres Gomez wrote:
> > Correct a typo introduced by
> > 037f68d81e1 ("glsl: apply align layout qualifier rules to block
> > offsets")
> > 
> > Cc: Timothy Arceri 
> > Signed-off-by: Andres Gomez 
> 
> This should probably have this tag:
> 
> Fixes: 037f68d81e1 "glsl: apply align layout qualifier rules to block
> offsets"

Didn't think it was important enough to add the tag which will pull
into the stable releases ... 

... it will also be more traceable with it added so, I suppose I will
do so.

> With that added:
> 
> Reviewed-by: Erik Faye-Lund 

Thanks! ☺

-- 
Br,

Andres

signature.asc
Description: This is a digitally signed message part
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 0/6] Enable fp16 visuals and fbconfigs

2019-01-07 Thread Strasser, Kevin
On 1/7/19 8:44 AM, Tapani Pälli wrote:
> > On 1/4/19 11:56 PM, Kevin Strasser wrote:
> >> While I have run this series against Piglit, I still need to sort out
> >> test coverage for these formats. If anyone has pointers to existing
> >> tests that would be really helpful.
> >
> > dEQP (EGL module) has set of 'wide color' tests that also cover
> > 1010102 and fp16.
> 
> Having said that, it's not really a 'complete test' but at least something to
> start with. 

Right, I'm familiar with that test, looks to just check if the config gets
exposed or not. I was hoping to find some tests that were more rigorously
testing 10 bit configs that I would be able to extend for fp16. So far I
haven't found much between piglit and dEQP, but I'm still looking.

> Does kernel already support fp16?

I have a series that enables fp16 for ICL+ [1], and Ville has patches that can
support platforms prior to that. I still need to rework my series with the
review feedback I got.. mostly concerned with getting userspace ready before
trying to land that. 

Thanks,
Kevin

[1] https://patchwork.freedesktop.org/series/53213/
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] radv: remove unnecessary return in GetPhysicalDeviceMemoryProperties2()

2019-01-07 Thread Samuel Pitoiset
This function returns nothing.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 53aed1a15db..39bd47348a9 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1345,8 +1345,8 @@ void radv_GetPhysicalDeviceMemoryProperties2(
VkPhysicalDevicephysicalDevice,
VkPhysicalDeviceMemoryProperties2KHR   *pMemoryProperties)
 {
-   return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
- 
>memoryProperties);
+   radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
+  
>memoryProperties);
 }
 
 VkResult radv_GetMemoryHostPointerPropertiesEXT(
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] radv: add support for VK_EXT_memory_budget

2019-01-07 Thread Samuel Pitoiset
A simple Vulkan extension that allows apps to query size and
usage of all exposed memory heaps.

The different usage values are not really accurate because
they are per drm-fd, but they should be close enough.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_device.c  | 44 +++
 src/amd/vulkan/radv_extensions.py |  1 +
 2 files changed, 45 insertions(+)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index cef3a430555..32eaeb3b226 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1352,12 +1352,56 @@ void radv_GetPhysicalDeviceMemoryProperties(
*pMemoryProperties = physical_device->memory_properties;
 }
 
+static void
+radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceMemoryBudgetPropertiesEXT 
*memoryBudget)
+{
+   RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+   VkPhysicalDeviceMemoryProperties *memory_properties = 
>memory_properties;
+   uint64_t visible_vram_size = radv_get_visible_vram_size(device);
+   uint64_t vram_size = radv_get_vram_size(device);
+   uint64_t gtt_size = device->rad_info.gart_size;
+
+   if (vram_size) {
+   memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM] = vram_size;
+   memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM] =
+   device->ws->query_value(device->ws, RADEON_VRAM_USAGE);
+   }
+
+   if (visible_vram_size) {
+   memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = 
visible_vram_size;
+   memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM_CPU_ACCESS] =
+   device->ws->query_value(device->ws, 
RADEON_VRAM_VIS_USAGE);
+   }
+
+   if (gtt_size) {
+   memoryBudget->heapBudget[RADV_MEM_HEAP_GTT] = gtt_size;
+   memoryBudget->heapUsage[RADV_MEM_HEAP_GTT] =
+   device->ws->query_value(device->ws, RADEON_GTT_USAGE);
+   }
+
+   /* The heapBudget and heapUsage values must be zero for array elements
+* greater than or equal to
+* VkPhysicalDeviceMemoryProperties::memoryHeapCount.
+*/
+   for (uint32_t i = memory_properties->memoryHeapCount; i < 
VK_MAX_MEMORY_HEAPS; i++) {
+   memoryBudget->heapBudget[i] = 0;
+   memoryBudget->heapUsage[i] = 0;
+   }
+}
+
 void radv_GetPhysicalDeviceMemoryProperties2(
VkPhysicalDevicephysicalDevice,
VkPhysicalDeviceMemoryProperties2KHR   *pMemoryProperties)
 {
radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
   
>memoryProperties);
+
+   VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
+   vk_find_struct(pMemoryProperties->pNext,
+  PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
+   if (memory_budget)
+   radv_get_memory_budget_properties(physicalDevice, 
memory_budget);
 }
 
 VkResult radv_GetMemoryHostPointerPropertiesEXT(
diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index 9952bb9c1c6..491ed9d94c3 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -105,6 +105,7 @@ EXTENSIONS = [
 Extension('VK_EXT_external_memory_dma_buf',   1, True),
 Extension('VK_EXT_external_memory_host',  1, 
'device->rad_info.has_userptr'),
 Extension('VK_EXT_global_priority',   1, 
'device->rad_info.has_ctx_priority'),
+Extension('VK_EXT_memory_budget', 1, True),
 Extension('VK_EXT_pci_bus_info',  2, True),
 Extension('VK_EXT_sampler_filter_minmax', 1, 
'device->rad_info.chip_class >= CIK'),
 Extension('VK_EXT_scalar_block_layout',   1, 
'device->rad_info.chip_class >= CIK'),
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] radv: add two small helpers for getting VRAM and visible VRAM sizes

2019-01-07 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_device.c | 21 -
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 39bd47348a9..cef3a430555 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -122,19 +122,30 @@ radv_get_device_name(enum radeon_family family, char 
*name, size_t name_len)
snprintf(name, name_len, "%s%s", chip_string, llvm_string);
 }
 
+static uint64_t
+radv_get_visible_vram_size(struct radv_physical_device *device)
+{
+   return MIN2(device->rad_info.vram_size, device->rad_info.vram_vis_size);
+}
+
+static uint64_t
+radv_get_vram_size(struct radv_physical_device *device)
+{
+   return device->rad_info.vram_size - radv_get_visible_vram_size(device);
+}
+
 static void
 radv_physical_device_init_mem_types(struct radv_physical_device *device)
 {
STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
-   uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
- device->rad_info.vram_vis_size);
-
+   uint64_t visible_vram_size = radv_get_visible_vram_size(device);
+   uint64_t vram_size = radv_get_vram_size(device);
int vram_index = -1, visible_vram_index = -1, gart_index = -1;
device->memory_properties.memoryHeapCount = 0;
-   if (device->rad_info.vram_size - visible_vram_size > 0) {
+   if (vram_size > 0) {
vram_index = device->memory_properties.memoryHeapCount++;
device->memory_properties.memoryHeaps[vram_index] = 
(VkMemoryHeap) {
-   .size = device->rad_info.vram_size - visible_vram_size,
+   .size = vram_size,
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
};
}
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 108594] [RADV] Graphics distortion in Evil within 1 if reflections enabled

2019-01-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108594

--- Comment #4 from Rhys Perry  ---
https://reviews.llvm.org/rL350532 seems to fix the The Evil Within issue for
myself. Can you confirm that it fixes it for you?

Can you also see if that commit fixes the NFS Payback issue?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/16] nir: improve convert_yuv_to_rgb when fuse_ffma=true

2019-01-07 Thread Jonathan Marek

Hi,

Did you get a chance try this? If not, I might be able to try it myself 
as I have Intel HW.


On 12/19/18 12:34 PM, Lionel Landwerlin wrote:

Hey Jonathan,

I'm kind of curious as to whether we can have a single expression that 
pretty much generates the same final code (through some of the algebraic 
lowering/optimizations).

I'll give it a try on Intel HW, see what it does.

-
Lionel

On 19/12/2018 16:39, Jonathan Marek wrote:
When ffma is available, we can use a different arrangement of 
constants to

get a better result. On freedreno/ir3, this reduces the YUV->RGB to 7
scalar ffma. On freedreno/a2xx, it will allow YUV->RGB to be 3 vec4 ffma.

Signed-off-by: Jonathan Marek 
---
  src/compiler/nir/nir_lower_tex.c | 62 ++--
  1 file changed, 43 insertions(+), 19 deletions(-)

diff --git a/src/compiler/nir/nir_lower_tex.c 
b/src/compiler/nir/nir_lower_tex.c

index 6a6b6c41a7..f7c821bb34 100644
--- a/src/compiler/nir/nir_lower_tex.c
+++ b/src/compiler/nir/nir_lower_tex.c
@@ -342,25 +342,49 @@ convert_yuv_to_rgb(nir_builder *b, nir_tex_instr 
*tex,

 nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
 nir_ssa_def *a)
  {
-   nir_const_value m[3] = {
-  { .f32 = { 1.0f,  0.0f, 1.59602678f, 0.0f } },
-  { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } },
-  { .f32 = { 1.0f,  2.01723214f,  0.0f,    0.0f } }
-   };
-
-   nir_ssa_def *yuv =
-  nir_vec4(b,
-   nir_fmul(b, nir_imm_float(b, 1.16438356f),
-    nir_fadd(b, y, nir_imm_float(b, -16.0f / 
255.0f))),
-   nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f 
/ 255.0f)), 0),
-   nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f 
/ 255.0f)), 0),

-   nir_imm_float(b, 0.0));
-
-   nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0]));
-   nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, 
m[1]));

-   nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2]));
-
-   nir_ssa_def *result = nir_vec4(b, red, green, blue, a);
+   nir_ssa_def *result;
+
+
+   if (b->shader->options->fuse_ffma) {
+  nir_const_value m[4] = {
+ { .f32 = { 1.16438356f, 1.16438356f, 1.16438356f, 0.0f } },
+ { .f32 = { 0.0f,   -0.39176229f, 2.01723214f, 0.0f } },
+ { .f32 = { 1.59602678f,-0.81296764f, 0.0f,    0.0f } },
+  };
+  static const float y_off = -16.0f * 1.16438356f / 255.0f;
+  static const float sc = 128.0f / 255.0f;
+
+  nir_ssa_def *offset =
+ nir_vec4(b,
+  nir_imm_float(b, y_off - sc * 1.59602678f),
+  nir_imm_float(b, y_off + sc * (0.81296764f + 
0.39176229f)),

+  nir_imm_float(b, y_off - sc * 2.01723214f),
+  a);
+
+  result = nir_ffma(b, y, nir_build_imm(b, 4, 32, m[0]),
+   nir_ffma(b, u, nir_build_imm(b, 4, 32, m[1]),
+    nir_ffma(b, v, nir_build_imm(b, 4, 
32, m[2]), offset)));

+   } else {
+  nir_const_value m[3] = {
+ { .f32 = { 1.0f,  0.0f, 1.59602678f, 0.0f } },
+ { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } },
+ { .f32 = { 1.0f,  2.01723214f,  0.0f,    0.0f } }
+  };
+
+  nir_ssa_def *yuv =
+ nir_vec4(b,
+  nir_fmul(b, nir_imm_float(b, 1.16438356f),
+   nir_fadd(b, y, nir_imm_float(b, -16.0f / 
255.0f))),
+  nir_channel(b, nir_fadd(b, u, nir_imm_float(b, 
-128.0f / 255.0f)), 0),
+  nir_channel(b, nir_fadd(b, v, nir_imm_float(b, 
-128.0f / 255.0f)), 0),

+  nir_imm_float(b, 0.0));
+
+  nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, 
m[0]));
+  nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, 
m[1]));
+  nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, 
m[2]));

+
+  result = nir_vec4(b, red, green, blue, a);
+   }
 nir_ssa_def_rewrite_uses(>dest.ssa, nir_src_for_ssa(result));
  }




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/12] nir/spirv: handle SpvStorageClassCrossWorkgroup

2019-01-07 Thread Jason Ekstrand
This seems reasonable.  However, I'd like to ask that you land patch 5 ASAP
and wait a little while on landing this one.  Having a at least couple
weeks with the rename but without adding back in nir_var_global with a
different meaning will increase the liklihood that merge conflicts will
result in compile errors rather than weird silent failures.

On Tue, Dec 4, 2018 at 12:27 PM Karol Herbst  wrote:

> Signed-off-by: Karol Herbst 
> ---
>  src/compiler/nir/nir.c | 4 
>  src/compiler/nir/nir.h | 1 +
>  src/compiler/nir/nir_print.c   | 2 ++
>  src/compiler/spirv/vtn_private.h   | 1 +
>  src/compiler/spirv/vtn_variables.c | 4 
>  5 files changed, 12 insertions(+)
>
> diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
> index 45c4a3e8375..7f16200015f 100644
> --- a/src/compiler/nir/nir.c
> +++ b/src/compiler/nir/nir.c
> @@ -129,6 +129,10 @@ nir_shader_add_variable(nir_shader *shader,
> nir_variable *var)
>assert(!"nir_shader_add_variable cannot be used for local
> variables");
>break;
>
> +   case nir_var_global:
> +  assert(!"nir_shader_add_variable cannot be used for global memory");
> +  break;
> +
> case nir_var_private:
>exec_list_push_tail(>globals, >node);
>break;
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 30d22fb9d7d..e9f8f15d387 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -103,6 +103,7 @@ typedef enum {
> nir_var_shader_storage  = (1 << 5),
> nir_var_system_value= (1 << 6),
> nir_var_shared  = (1 << 8),
> +   nir_var_global  = (1 << 9),
> nir_var_all = ~0,
>  } nir_variable_mode;
>
> diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
> index 1d409b1da7b..f509c92e0cd 100644
> --- a/src/compiler/nir/nir_print.c
> +++ b/src/compiler/nir/nir_print.c
> @@ -413,6 +413,8 @@ get_variable_mode_str(nir_variable_mode mode, bool
> want_local_global_mode)
>return want_local_global_mode ? "private" : "";
> case nir_var_function:
>return want_local_global_mode ? "function" : "";
> +   case nir_var_global:
> +  return want_local_global_mode ? "global" : "";
> default:
>return "";
> }
> diff --git a/src/compiler/spirv/vtn_private.h
> b/src/compiler/spirv/vtn_private.h
> index b84ac2cf0b4..e380d8e82ff 100644
> --- a/src/compiler/spirv/vtn_private.h
> +++ b/src/compiler/spirv/vtn_private.h
> @@ -417,6 +417,7 @@ enum vtn_variable_mode {
> vtn_variable_mode_ssbo,
> vtn_variable_mode_push_constant,
> vtn_variable_mode_workgroup,
> +   vtn_variable_mode_cross_workgroup,
> vtn_variable_mode_input,
> vtn_variable_mode_output,
>  };
> diff --git a/src/compiler/spirv/vtn_variables.c
> b/src/compiler/spirv/vtn_variables.c
> index b911b114b70..5bf407eb8a7 100644
> --- a/src/compiler/spirv/vtn_variables.c
> +++ b/src/compiler/spirv/vtn_variables.c
> @@ -1582,6 +1582,9 @@ vtn_storage_class_to_mode(struct vtn_builder *b,
>nir_mode = nir_var_uniform;
>break;
> case SpvStorageClassCrossWorkgroup:
> +  mode = vtn_variable_mode_cross_workgroup;
> +  nir_mode = nir_var_global;
> +  break;
> case SpvStorageClassGeneric:
> default:
>vtn_fail("Unhandled variable storage class");
> @@ -1841,6 +1844,7 @@ vtn_create_variable(struct vtn_builder *b, struct
> vtn_value *val,
> case vtn_variable_mode_ubo:
> case vtn_variable_mode_ssbo:
> case vtn_variable_mode_push_constant:
> +   case vtn_variable_mode_cross_workgroup:
>/* These don't need actual variables. */
>break;
> }
> --
> 2.19.2
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radv: add support for VK_EXT_memory_budget

2019-01-07 Thread Alex Smith
Hi Samuel,

Thanks for implementing this - I've been wanting this extension for a while
so it's good it's finally available.

This is just reporting the total heap sizes as the budget, which is the
same info we already get from the basic heap properties. The way I'd
expected budget to work (and what the spec is saying as far as I can see)
is that it's an estimate of how much is available for the calling app to
use in that heap at the time of the call, so should account for current
system-wide usage of the heap by other apps. Shouldn't this be something
like (heap size - system wide usage of the heap + current app usage of the
heap)? (+ app usage since the spec says budget includes currently allocated
device memory)

Alex

On Mon, 7 Jan 2019 at 16:35, Samuel Pitoiset 
wrote:

> A simple Vulkan extension that allows apps to query size and
> usage of all exposed memory heaps.
>
> The different usage values are not really accurate because
> they are per drm-fd, but they should be close enough.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_device.c  | 44 +++
>  src/amd/vulkan/radv_extensions.py |  1 +
>  2 files changed, 45 insertions(+)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index cef3a430555..32eaeb3b226 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -1352,12 +1352,56 @@ void radv_GetPhysicalDeviceMemoryProperties(
> *pMemoryProperties = physical_device->memory_properties;
>  }
>
> +static void
> +radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
> +
>  VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
> +{
> +   RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
> +   VkPhysicalDeviceMemoryProperties *memory_properties =
> >memory_properties;
> +   uint64_t visible_vram_size = radv_get_visible_vram_size(device);
> +   uint64_t vram_size = radv_get_vram_size(device);
> +   uint64_t gtt_size = device->rad_info.gart_size;
> +
> +   if (vram_size) {
> +   memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM] = vram_size;
> +   memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM] =
> +   device->ws->query_value(device->ws,
> RADEON_VRAM_USAGE);
> +   }
> +
> +   if (visible_vram_size) {
> +   memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM_CPU_ACCESS] =
> visible_vram_size;
> +   memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM_CPU_ACCESS] =
> +   device->ws->query_value(device->ws,
> RADEON_VRAM_VIS_USAGE);
> +   }
> +
> +   if (gtt_size) {
> +   memoryBudget->heapBudget[RADV_MEM_HEAP_GTT] = gtt_size;
> +   memoryBudget->heapUsage[RADV_MEM_HEAP_GTT] =
> +   device->ws->query_value(device->ws,
> RADEON_GTT_USAGE);
> +   }
> +
> +   /* The heapBudget and heapUsage values must be zero for array
> elements
> +* greater than or equal to
> +* VkPhysicalDeviceMemoryProperties::memoryHeapCount.
> +*/
> +   for (uint32_t i = memory_properties->memoryHeapCount; i <
> VK_MAX_MEMORY_HEAPS; i++) {
> +   memoryBudget->heapBudget[i] = 0;
> +   memoryBudget->heapUsage[i] = 0;
> +   }
> +}
> +
>  void radv_GetPhysicalDeviceMemoryProperties2(
> VkPhysicalDevicephysicalDevice,
> VkPhysicalDeviceMemoryProperties2KHR   *pMemoryProperties)
>  {
> radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
>
>  >memoryProperties);
> +
> +   VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
> +   vk_find_struct(pMemoryProperties->pNext,
> +
> PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
> +   if (memory_budget)
> +   radv_get_memory_budget_properties(physicalDevice,
> memory_budget);
>  }
>
>  VkResult radv_GetMemoryHostPointerPropertiesEXT(
> diff --git a/src/amd/vulkan/radv_extensions.py
> b/src/amd/vulkan/radv_extensions.py
> index 9952bb9c1c6..491ed9d94c3 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -105,6 +105,7 @@ EXTENSIONS = [
>  Extension('VK_EXT_external_memory_dma_buf',   1, True),
>  Extension('VK_EXT_external_memory_host',  1,
> 'device->rad_info.has_userptr'),
>  Extension('VK_EXT_global_priority',   1,
> 'device->rad_info.has_ctx_priority'),
> +Extension('VK_EXT_memory_budget', 1, True),
>  Extension('VK_EXT_pci_bus_info',  2, True),
>  Extension('VK_EXT_sampler_filter_minmax', 1,
> 'device->rad_info.chip_class >= CIK'),
>  Extension('VK_EXT_scalar_block_layout',   1,
> 'device->rad_info.chip_class >= CIK'),
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> 

[Mesa-dev] [Bug 109242] [RADV] The Witcher 3 system freeze

2019-01-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109242

Bug ID: 109242
   Summary: [RADV] The Witcher 3 system freeze
   Product: Mesa
   Version: 18.3
  Hardware: x86-64 (AMD64)
OS: Linux (All)
Status: NEW
  Severity: normal
  Priority: medium
 Component: Drivers/Vulkan/radeon
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: terapy-sess...@bk.ru
QA Contact: mesa-dev@lists.freedesktop.org

The Witcher 3 freezes the system when using the RADV driver. 
Mesa: 18.3.1
Kernel driver: amdgpu
OS: Arch
GPU: RX460

There is no such problem with AMDVLK.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109242] [RADV] The Witcher 3 system freeze

2019-01-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109242

--- Comment #1 from Samuel Pitoiset  ---
Does this is a recent regression for you?

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/6] vulkan: Update the XML and headers to 1.1.97

2019-01-07 Thread Jason Ekstrand
---
 include/vulkan/vulkan.h |   2 +-
 include/vulkan/vulkan_android.h |   2 +-
 include/vulkan/vulkan_core.h| 155 +++-
 include/vulkan/vulkan_fuchsia.h |   2 +-
 include/vulkan/vulkan_ios.h |   2 +-
 include/vulkan/vulkan_macos.h   |   2 +-
 include/vulkan/vulkan_vi.h  |   2 +-
 include/vulkan/vulkan_wayland.h |   2 +-
 include/vulkan/vulkan_win32.h   |   2 +-
 include/vulkan/vulkan_xcb.h |   2 +-
 include/vulkan/vulkan_xlib.h|   2 +-
 include/vulkan/vulkan_xlib_xrandr.h |   2 +-
 src/vulkan/registry/vk.xml  | 175 +++-
 13 files changed, 311 insertions(+), 41 deletions(-)

diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h
index 77da63783e6..a3be4af6c46 100644
--- a/include/vulkan/vulkan.h
+++ b/include/vulkan/vulkan.h
@@ -2,7 +2,7 @@
 #define VULKAN_H_ 1
 
 /*
-** Copyright (c) 2015-2018 The Khronos Group Inc.
+** Copyright (c) 2015-2019 The Khronos Group Inc.
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
diff --git a/include/vulkan/vulkan_android.h b/include/vulkan/vulkan_android.h
index 07aaeda28e3..e70376c8867 100644
--- a/include/vulkan/vulkan_android.h
+++ b/include/vulkan/vulkan_android.h
@@ -6,7 +6,7 @@ extern "C" {
 #endif
 
 /*
-** Copyright (c) 2015-2018 The Khronos Group Inc.
+** Copyright (c) 2015-2019 The Khronos Group Inc.
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
diff --git a/include/vulkan/vulkan_core.h b/include/vulkan/vulkan_core.h
index 72542c72ec8..caeecd9bed1 100644
--- a/include/vulkan/vulkan_core.h
+++ b/include/vulkan/vulkan_core.h
@@ -6,7 +6,7 @@ extern "C" {
 #endif
 
 /*
-** Copyright (c) 2015-2018 The Khronos Group Inc.
+** Copyright (c) 2015-2019 The Khronos Group Inc.
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
@@ -43,7 +43,7 @@ extern "C" {
 #define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff)
 #define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff)
 // Version of this file
-#define VK_HEADER_VERSION 96
+#define VK_HEADER_VERSION 97
 
 
 #define VK_NULL_HANDLE 0
@@ -148,6 +148,7 @@ typedef enum VkResult {
 VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT = -1000158000,
 VK_ERROR_FRAGMENTATION_EXT = -1000161000,
 VK_ERROR_NOT_PERMITTED_EXT = -1000174001,
+VK_ERROR_INVALID_DEVICE_ADDRESS_EXT = -1000244000,
 VK_ERROR_OUT_OF_POOL_MEMORY_KHR = VK_ERROR_OUT_OF_POOL_MEMORY,
 VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR = VK_ERROR_INVALID_EXTERNAL_HANDLE,
 VK_RESULT_BEGIN_RANGE = VK_ERROR_FRAGMENTED_POOL,
@@ -444,6 +445,8 @@ typedef enum VkStructureType {
 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT = 
1000190002,
 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR = 1000196000,
 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR = 
1000197000,
+VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR = 
1000199000,
+VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR = 
1000199001,
 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV = 
1000201000,
 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV = 1000202000,
 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV = 1000202001,
@@ -460,7 +463,14 @@ typedef enum VkStructureType {
 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_PROPERTIES_EXT = 
1000218001,
 VK_STRUCTURE_TYPE_RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT = 
1000218002,
 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT = 
1000221000,
+VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT = 
1000237000,
+VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT = 
1000238000,
+VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT = 1000238001,
+VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_ADDRESS_FEATURES_EXT = 1000244000,
+VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT = 1000244001,
+VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT = 1000244002,
 VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO_EXT = 1000246000,
+VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT = 1000247000,
 VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT = 
VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT,
 VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR = 
VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO,
 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR = 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES,
@@ -1539,6 +1549,7 @@ typedef enum VkBufferCreateFlagBits {
 VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT = 0x0002,
 VK_BUFFER_CREATE_SPARSE_ALIASED_BIT = 

[Mesa-dev] [PATCH 5/6] anv: Move resolve_subpass to genX_cmd_buffer.c

2019-01-07 Thread Jason Ekstrand
We may have to do transitions around certain kinds of resolves so it
helps to have it genX code.
---
 src/intel/vulkan/anv_blorp.c   | 66 --
 src/intel/vulkan/anv_private.h |  2 -
 src/intel/vulkan/genX_cmd_buffer.c | 61 ++-
 3 files changed, 59 insertions(+), 70 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 2f8d502e289..469caa5e33d 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1307,72 +1307,6 @@ fast_clear_aux_usage(const struct anv_image *image,
   return image->planes[plane].aux_usage;
 }
 
-void
-anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
-{
-   struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
-   struct anv_subpass *subpass = cmd_buffer->state.subpass;
-
-   if (subpass->has_color_resolve) {
-  /* We are about to do some MSAA resolves.  We need to flush so that the
-   * result of writes to the MSAA color attachments show up in the sampler
-   * when we blit to the single-sampled resolve target.
-   */
-  cmd_buffer->state.pending_pipe_bits |=
- ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
- ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
-
-  for (uint32_t i = 0; i < subpass->color_count; ++i) {
- uint32_t src_att = subpass->color_attachments[i].attachment;
- uint32_t dst_att = subpass->resolve_attachments[i].attachment;
-
- if (dst_att == VK_ATTACHMENT_UNUSED)
-continue;
-
- assert(src_att < cmd_buffer->state.pass->attachment_count);
- assert(dst_att < cmd_buffer->state.pass->attachment_count);
-
- if (cmd_buffer->state.attachments[dst_att].pending_clear_aspects) {
-/* From the Vulkan 1.0 spec:
- *
- *If the first use of an attachment in a render pass is as a
- *resolve attachment, then the loadOp is effectively ignored
- *as the resolve is guaranteed to overwrite all pixels in the
- *render area.
- */
-cmd_buffer->state.attachments[dst_att].pending_clear_aspects = 0;
- }
-
- struct anv_image_view *src_iview = fb->attachments[src_att];
- struct anv_image_view *dst_iview = fb->attachments[dst_att];
-
- const VkRect2D render_area = cmd_buffer->state.render_area;
-
- enum isl_aux_usage src_aux_usage =
-cmd_buffer->state.attachments[src_att].aux_usage;
- enum isl_aux_usage dst_aux_usage =
-cmd_buffer->state.attachments[dst_att].aux_usage;
-
- assert(src_iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT &&
-dst_iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT);
-
- anv_image_msaa_resolve(cmd_buffer,
-src_iview->image, src_aux_usage,
-src_iview->planes[0].isl.base_level,
-src_iview->planes[0].isl.base_array_layer,
-dst_iview->image, dst_aux_usage,
-dst_iview->planes[0].isl.base_level,
-dst_iview->planes[0].isl.base_array_layer,
-VK_IMAGE_ASPECT_COLOR_BIT,
-render_area.offset.x, render_area.offset.y,
-render_area.offset.x, render_area.offset.y,
-render_area.extent.width,
-render_area.extent.height,
-fb->layers, BLORP_FILTER_NONE);
-  }
-   }
-}
-
 void
 anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
  const struct anv_image *image,
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index a064a058822..6992db277fc 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -2205,8 +2205,6 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer 
*cmd_buffer,
 struct anv_state
 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
 
-void anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer);
-
 const struct anv_image_view *
 anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer);
 
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 93b5269c6ba..b0b56472e57 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -3882,10 +3882,67 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer 
*cmd_buffer)
struct anv_cmd_state *cmd_state = _buffer->state;
struct anv_subpass *subpass = cmd_state->subpass;
uint32_t subpass_id = anv_get_subpass_id(_buffer->state);
+   struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
 
-   anv_cmd_buffer_resolve_subpass(cmd_buffer);
+   if (subpass->has_color_resolve) {
+  /* We are about to do some MSAA 

[Mesa-dev] [PATCH 6/6] anv: Implement VK_KHR_depth_stencil_resolve

2019-01-07 Thread Jason Ekstrand
---
 src/intel/vulkan/anv_device.c  |  28 ++
 src/intel/vulkan/anv_extensions.py |   1 +
 src/intel/vulkan/anv_pass.c|  37 +++-
 src/intel/vulkan/anv_private.h |   3 +
 src/intel/vulkan/genX_cmd_buffer.c | 136 +
 5 files changed, 204 insertions(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 2a3919d2949..3761846bb7f 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1138,6 +1138,34 @@ void anv_GetPhysicalDeviceProperties2(
  break;
   }
 
+  case 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR: {
+ VkPhysicalDeviceDepthStencilResolvePropertiesKHR *props =
+(VkPhysicalDeviceDepthStencilResolvePropertiesKHR *)ext;
+
+ /* We support all of the depth resolve modes */
+ props->supportedDepthResolveModes =
+VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
+VK_RESOLVE_MODE_AVERAGE_BIT_KHR |
+VK_RESOLVE_MODE_MIN_BIT_KHR |
+VK_RESOLVE_MODE_MAX_BIT_KHR;
+
+ /* Average doesn't make sense for stencil so we don't support that */
+ props->supportedStencilResolveModes =
+VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR;
+ if (pdevice->info.gen >= 8) {
+/* The advanced stencil resolve modes currently require stencil
+ * sampling be supported by the hardware.
+ */
+props->supportedStencilResolveModes |=
+   VK_RESOLVE_MODE_MIN_BIT_KHR |
+   VK_RESOLVE_MODE_MAX_BIT_KHR;
+ }
+
+ props->independentResolveNone = VK_TRUE;
+ props->independentResolve = VK_TRUE;
+ break;
+  }
+
   case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR: {
  VkPhysicalDeviceDriverPropertiesKHR *driver_props =
 (VkPhysicalDeviceDriverPropertiesKHR *) ext;
diff --git a/src/intel/vulkan/anv_extensions.py 
b/src/intel/vulkan/anv_extensions.py
index 388845003aa..2ea4cab0e97 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -76,6 +76,7 @@ EXTENSIONS = [
 Extension('VK_KHR_bind_memory2',  1, True),
 Extension('VK_KHR_create_renderpass2',1, True),
 Extension('VK_KHR_dedicated_allocation',  1, True),
+Extension('VK_KHR_depth_stencil_resolve', 1, True),
 Extension('VK_KHR_descriptor_update_template',1, True),
 Extension('VK_KHR_device_group',  1, True),
 Extension('VK_KHR_device_group_creation', 1, True),
diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c
index 7b17cc06935..196cf3ff8fd 100644
--- a/src/intel/vulkan/anv_pass.c
+++ b/src/intel/vulkan/anv_pass.c
@@ -74,6 +74,10 @@ anv_render_pass_compile(struct anv_render_pass *pass)
   subpass->depth_stencil_attachment->attachment == 
VK_ATTACHMENT_UNUSED)
  subpass->depth_stencil_attachment = NULL;
 
+  if (subpass->ds_resolve_attachment &&
+  subpass->ds_resolve_attachment->attachment == VK_ATTACHMENT_UNUSED)
+ subpass->ds_resolve_attachment = NULL;
+
   for (uint32_t j = 0; j < subpass->attachment_count; j++) {
  struct anv_subpass_attachment *subpass_att = >attachments[j];
  if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
@@ -116,6 +120,16 @@ anv_render_pass_compile(struct anv_render_pass *pass)
 color_att->usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
  }
   }
+
+  if (subpass->ds_resolve_attachment) {
+ struct anv_subpass_attachment *ds_att =
+subpass->depth_stencil_attachment;
+ UNUSED struct anv_subpass_attachment *resolve_att =
+subpass->ds_resolve_attachment;
+
+ assert(resolve_att->usage == VK_IMAGE_USAGE_TRANSFER_DST_BIT);
+ ds_att->usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
+  }
}
 
/* From the Vulkan 1.0.39 spec:
@@ -342,10 +356,15 @@ VkResult anv_CreateRenderPass(
 static unsigned
 num_subpass_attachments2(const VkSubpassDescription2KHR *desc)
 {
+   const VkSubpassDescriptionDepthStencilResolveKHR *ds_resolve =
+  vk_find_struct_const(desc->pNext,
+   SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR);
+
return desc->inputAttachmentCount +
   desc->colorAttachmentCount +
   (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
-  (desc->pDepthStencilAttachment != NULL);
+  (desc->pDepthStencilAttachment != NULL) +
+  (ds_resolve && ds_resolve->pDepthStencilResolveAttachment);
 }
 
 VkResult anv_CreateRenderPass2KHR(
@@ -460,6 +479,22 @@ VkResult anv_CreateRenderPass2KHR(
 .layout =  desc->pDepthStencilAttachment->layout,
  };
   }
+
+  const VkSubpassDescriptionDepthStencilResolveKHR *ds_resolve =
+ vk_find_struct_const(desc->pNext,
+

[Mesa-dev] [PATCH 3/6] anv: Rename has_resolve to has_color_resolve

2019-01-07 Thread Jason Ekstrand
---
 src/intel/vulkan/anv_blorp.c   | 2 +-
 src/intel/vulkan/anv_pass.c| 4 ++--
 src/intel/vulkan/anv_private.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 478b8e7a3db..eee7a8c3b3c 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1309,7 +1309,7 @@ anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer 
*cmd_buffer)
struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
struct anv_subpass *subpass = cmd_buffer->state.subpass;
 
-   if (subpass->has_resolve) {
+   if (subpass->has_color_resolve) {
   struct blorp_batch batch;
   blorp_batch_init(_buffer->device->blorp, , cmd_buffer, 0);
 
diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c
index 72bd99202ec..7b17cc06935 100644
--- a/src/intel/vulkan/anv_pass.c
+++ b/src/intel/vulkan/anv_pass.c
@@ -100,7 +100,7 @@ anv_render_pass_compile(struct anv_render_pass *pass)
   }
 
   /* We have to handle resolve attachments specially */
-  subpass->has_resolve = false;
+  subpass->has_color_resolve = false;
   if (subpass->resolve_attachments) {
  for (uint32_t j = 0; j < subpass->color_count; j++) {
 struct anv_subpass_attachment *color_att =
@@ -110,7 +110,7 @@ anv_render_pass_compile(struct anv_render_pass *pass)
 if (resolve_att->attachment == VK_ATTACHMENT_UNUSED)
continue;
 
-subpass->has_resolve = true;
+subpass->has_color_resolve = true;
 
 assert(resolve_att->usage == VK_IMAGE_USAGE_TRANSFER_DST_BIT);
 color_att->usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 903931472da..0b67e7598b4 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -3209,8 +3209,8 @@ struct anv_subpass {
/** Subpass has a depth/stencil self-dependency */
bool has_ds_self_dep;
 
-   /** Subpass has at least one resolve attachment */
-   bool has_resolve;
+   /** Subpass has at least one color resolve attachment */
+   bool has_color_resolve;
 };
 
 static inline unsigned
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] anv: Sort properties and features switch statements

2019-01-07 Thread Jason Ekstrand
---
 src/intel/vulkan/anv_device.c | 84 +--
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 2a3919d2949..cd179e6801c 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -893,9 +893,26 @@ void anv_GetPhysicalDeviceFeatures2(
 
vk_foreach_struct(ext, pFeatures->pNext) {
   switch (ext->sType) {
-  case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
- VkPhysicalDeviceProtectedMemoryFeatures *features = (void *)ext;
- features->protectedMemory = VK_FALSE;
+  case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: {
+ VkPhysicalDevice8BitStorageFeaturesKHR *features =
+(VkPhysicalDevice8BitStorageFeaturesKHR *)ext;
+ ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
+
+ features->storageBuffer8BitAccess = pdevice->info.gen >= 8;
+ features->uniformAndStorageBuffer8BitAccess = pdevice->info.gen >= 8;
+ features->storagePushConstant8 = pdevice->info.gen >= 8;
+ break;
+  }
+
+  case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR: {
+ VkPhysicalDevice16BitStorageFeaturesKHR *features =
+(VkPhysicalDevice16BitStorageFeaturesKHR *)ext;
+ ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
+
+ features->storageBuffer16BitAccess = pdevice->info.gen >= 8;
+ features->uniformAndStorageBuffer16BitAccess = pdevice->info.gen >= 8;
+ features->storagePushConstant16 = pdevice->info.gen >= 8;
+ features->storageInputOutput16 = false;
  break;
   }
 
@@ -908,10 +925,9 @@ void anv_GetPhysicalDeviceFeatures2(
  break;
   }
 
-  case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES: {
- VkPhysicalDeviceVariablePointerFeatures *features = (void *)ext;
- features->variablePointersStorageBuffer = true;
- features->variablePointers = true;
+  case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
+ VkPhysicalDeviceProtectedMemoryFeatures *features = (void *)ext;
+ features->protectedMemory = VK_FALSE;
  break;
   }
 
@@ -935,26 +951,10 @@ void anv_GetPhysicalDeviceFeatures2(
  break;
   }
 
-  case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR: {
- VkPhysicalDevice16BitStorageFeaturesKHR *features =
-(VkPhysicalDevice16BitStorageFeaturesKHR *)ext;
- ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
-
- features->storageBuffer16BitAccess = pdevice->info.gen >= 8;
- features->uniformAndStorageBuffer16BitAccess = pdevice->info.gen >= 8;
- features->storagePushConstant16 = pdevice->info.gen >= 8;
- features->storageInputOutput16 = false;
- break;
-  }
-
-  case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: {
- VkPhysicalDevice8BitStorageFeaturesKHR *features =
-(VkPhysicalDevice8BitStorageFeaturesKHR *)ext;
- ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
-
- features->storageBuffer8BitAccess = pdevice->info.gen >= 8;
- features->uniformAndStorageBuffer8BitAccess = pdevice->info.gen >= 8;
- features->storagePushConstant8 = pdevice->info.gen >= 8;
+  case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES: {
+ VkPhysicalDeviceVariablePointerFeatures *features = (void *)ext;
+ features->variablePointersStorageBuffer = true;
+ features->variablePointers = true;
  break;
   }
 
@@ -1130,14 +1130,6 @@ void anv_GetPhysicalDeviceProperties2(
 
vk_foreach_struct(ext, pProperties->pNext) {
   switch (ext->sType) {
-  case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
- VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
-(VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
-
- properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
- break;
-  }
-
   case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR: {
  VkPhysicalDeviceDriverPropertiesKHR *driver_props =
 (VkPhysicalDeviceDriverPropertiesKHR *) ext;
@@ -1205,6 +1197,21 @@ void anv_GetPhysicalDeviceProperties2(
  break;
   }
 
+  case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
+ VkPhysicalDeviceProtectedMemoryProperties *props =
+(VkPhysicalDeviceProtectedMemoryProperties *)ext;
+ props->protectedNoFault = false;
+ break;
+  }
+
+  case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
+ VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
+(VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
+
+ properties->maxPushDescriptors = 

[Mesa-dev] [PATCH 1/2] spirv: Sort supported capabilities

2019-01-07 Thread Jason Ekstrand
---
 src/amd/vulkan/radv_shader.c| 24 -
 src/compiler/shader_info.h  | 34 -
 src/intel/vulkan/anv_pipeline.c | 18 ++---
 src/mesa/drivers/dri/i965/brw_context.c | 10 
 4 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 7ad9abe8df8..7220738c7c8 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -220,32 +220,32 @@ radv_shader_compile_to_nir(struct radv_device *device,
}
const struct spirv_to_nir_options spirv_options = {
.caps = {
+   .descriptor_array_dynamic_indexing = true,
.device_group = true,
.draw_parameters = true,
.float64 = true,
+   .gcn_shader = true,
+   .geometry_streams = true,
.image_read_without_format = true,
.image_write_without_format = true,
-   .tessellation = true,
-   .int64 = true,
.int16 = true,
+   .int64 = true,
.multiview = true,
+   .runtime_descriptor_array = true,
+   .shader_viewport_index_layer = true,
+   .stencil_export = true,
+   .storage_16bit = true,
+   .storage_image_ms = true,
.subgroup_arithmetic = true,
.subgroup_ballot = true,
.subgroup_basic = true,
.subgroup_quad = true,
.subgroup_shuffle = true,
.subgroup_vote = true,
-   .variable_pointers = true,
-   .gcn_shader = true,
-   .trinary_minmax = true,
-   .shader_viewport_index_layer = true,
-   .descriptor_array_dynamic_indexing = true,
-   .runtime_descriptor_array = true,
-   .stencil_export = true,
-   .storage_16bit = true,
-   .geometry_streams = true,
+   .tessellation = true,
.transform_feedback = true,
-   .storage_image_ms = true,
+   .trinary_minmax = true,
+   .variable_pointers = true,
},
};
entry_point = spirv_to_nir(spirv, module->size / 4,
diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
index 05f37c8d197..dc47cd7656d 100644
--- a/src/compiler/shader_info.h
+++ b/src/compiler/shader_info.h
@@ -33,38 +33,38 @@ extern "C" {
 #endif
 
 struct spirv_supported_capabilities {
-   bool float64;
-   bool image_ms_array;
-   bool tessellation;
+   bool atomic_storage;
+   bool descriptor_array_dynamic_indexing;
bool device_group;
bool draw_parameters;
+   bool float64;
+   bool geometry_streams;
+   bool gcn_shader;
+   bool image_ms_array;
bool image_read_without_format;
bool image_write_without_format;
+   bool int16;
bool int64;
+   bool int64_atomics;
bool min_lod;
bool multiview;
-   bool variable_pointers;
-   bool storage_16bit;
-   bool int16;
+   bool post_depth_coverage;
+   bool runtime_descriptor_array;
bool shader_viewport_index_layer;
+   bool stencil_export;
+   bool storage_8bit;
+   bool storage_16bit;
+   bool storage_image_ms;
bool subgroup_arithmetic;
bool subgroup_ballot;
bool subgroup_basic;
bool subgroup_quad;
bool subgroup_shuffle;
bool subgroup_vote;
-   bool gcn_shader;
-   bool trinary_minmax;
-   bool descriptor_array_dynamic_indexing;
-   bool runtime_descriptor_array;
-   bool stencil_export;
-   bool atomic_storage;
-   bool storage_8bit;
-   bool post_depth_coverage;
+   bool tessellation;
bool transform_feedback;
-   bool geometry_streams;
-   bool int64_atomics;
-   bool storage_image_ms;
+   bool trinary_minmax;
+   bool variable_pointers;
 };
 
 typedef struct shader_info {
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 6db9945e0d4..a9ae379967b 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -136,27 +136,27 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
struct spirv_to_nir_options spirv_options = {
   .lower_workgroup_access_to_offsets = true,
   .caps = {
- .float64 = 

Re: [Mesa-dev] [PATCH 05/12] nir: rename global/local to private/function memory

2019-01-07 Thread Jason Ekstrand
Assuming it builds everywhere (probably best to double-check before pushing
anything)

Reviewed-by: Jason Ekstrand 

On Tue, Dec 4, 2018 at 12:27 PM Karol Herbst  wrote:

> the naming is a bit confusing no matter how you look at it. Within SPIR-V
> "global" memory is memory accessible from all threads. glsl "global" memory
> normally refers to shader thread private memory declared at global scope.
> As
> we already use "shared" for memory shared across all thrads of a work group
> the solution where everybody could be happy with is to rename "global" to
> "private" and use "global" later for memory usually stored within system
> accessible memory (be it VRAM or system RAM if keeping SVM in mind).
> glsl "local" memory is memory only accessible within a function, while
> SPIR-V
> "local" memory is memory accessible within the same workgroup.
>
> v2: rename local to function as well
>
> Signed-off-by: Karol Herbst 
> ---
>  src/amd/common/ac_nir_to_llvm.c   |  6 ++--
>  src/amd/vulkan/radv_shader.c  |  8 ++---
>  src/compiler/glsl/glsl_to_nir.cpp | 10 +++
>  src/compiler/nir/nir.c|  6 ++--
>  src/compiler/nir/nir.h|  8 ++---
>  src/compiler/nir/nir_linking_helpers.c|  2 +-
>  src/compiler/nir/nir_lower_clip.c |  2 +-
>  .../nir/nir_lower_constant_initializers.c |  6 ++--
>  .../nir/nir_lower_global_vars_to_local.c  |  6 ++--
>  .../nir/nir_lower_io_to_temporaries.c |  2 +-
>  src/compiler/nir/nir_lower_locals_to_regs.c   |  4 +--
>  src/compiler/nir/nir_lower_vars_to_ssa.c  |  8 ++---
>  src/compiler/nir/nir_opt_copy_prop_vars.c |  8 ++---
>  src/compiler/nir/nir_opt_dead_write_vars.c|  4 +--
>  src/compiler/nir/nir_opt_find_array_copies.c  |  4 +--
>  src/compiler/nir/nir_opt_large_constants.c| 14 -
>  src/compiler/nir/nir_print.c  |  8 ++---
>  src/compiler/nir/nir_remove_dead_variables.c  |  6 ++--
>  src/compiler/nir/nir_split_vars.c | 30 +--
>  src/compiler/nir/nir_validate.c   |  2 +-
>  src/compiler/nir/tests/vars_tests.cpp | 18 +--
>  src/compiler/spirv/vtn_cfg.c  |  2 +-
>  src/compiler/spirv/vtn_private.h  |  2 +-
>  src/compiler/spirv/vtn_variables.c|  8 ++---
>  src/freedreno/ir3/ir3_nir.c   |  2 +-
>  src/gallium/auxiliary/nir/tgsi_to_nir.c   |  2 +-
>  src/gallium/drivers/v3d/v3d_program.c |  2 +-
>  src/gallium/drivers/vc4/vc4_program.c |  4 +--
>  src/intel/compiler/brw_nir.c  | 10 +++
>  src/intel/vulkan/anv_pipeline.c   |  4 +--
>  src/mesa/main/glspirv.c   |  2 +-
>  src/mesa/state_tracker/st_glsl_to_nir.cpp |  4 +--
>  32 files changed, 102 insertions(+), 102 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c
> b/src/amd/common/ac_nir_to_llvm.c
> index 18e9b69f3c0..2d8a27a0ab9 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -1923,7 +1923,7 @@ static LLVMValueRef visit_load_var(struct
> ac_nir_context *ctx,
> values[chan] = ctx->abi->inputs[idx + chan
> + const_index * stride];
> }
> break;
> -   case nir_var_local:
> +   case nir_var_function:
> for (unsigned chan = 0; chan < ve; chan++) {
> if (indir_index) {
> unsigned count =
> glsl_count_attribute_slots(
> @@ -2055,7 +2055,7 @@ visit_store_var(struct ac_nir_context *ctx,
> }
> }
> break;
> -   case nir_var_local:
> +   case nir_var_function:
> for (unsigned chan = 0; chan < 8; chan++) {
> if (!(writemask & (1 << chan)))
> continue;
> @@ -4061,7 +4061,7 @@ ac_lower_indirect_derefs(struct nir_shader *nir,
> enum chip_class chip_class)
>  * See the following thread for more details of the problem:
>  *
> https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
>  */
> -   indirect_mask |= nir_var_local;
> +   indirect_mask |= nir_var_function;
>
> nir_lower_indirect_derefs(nir, indirect_mask);
>  }
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 456c462a230..fa15478ad2d 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -126,8 +126,8 @@ radv_optimize_nir(struct nir_shader *shader, bool
> optimize_conservatively,
>  do {
>  progress = false;
>
> -   NIR_PASS(progress, shader, nir_split_array_vars,
> nir_var_local);
> -   NIR_PASS(progress, shader, nir_shrink_vec_array_vars,
> nir_var_local);
> +   NIR_PASS(progress, shader, nir_split_array_vars,
> nir_var_function);
> +

Re: [Mesa-dev] [PATCH 04/12] nir/spirv: improve parsing of the memory model

2019-01-07 Thread Jason Ekstrand
On Tue, Dec 4, 2018 at 12:27 PM Karol Herbst  wrote:

> Signed-off-by: Karol Herbst 
> ---
>  src/compiler/nir/nir.h|  8 
>  src/compiler/nir/nir_clone.c  |  1 +
>  src/compiler/nir/nir_serialize.c  |  2 ++
>  src/compiler/spirv/spirv_to_nir.c | 26 ++
>  src/compiler/spirv/vtn_private.h  |  3 +++
>  5 files changed, 36 insertions(+), 4 deletions(-)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index db935c8496b..a111e87ed71 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2249,6 +2249,14 @@ typedef struct nir_shader {
>  */
> void *constant_data;
> unsigned constant_data_size;
> +
> +   /**
> +* pointer size is:
> +*   AddressingModelLogical:0(default)
> +*   AddressingModelPhysical32: 32
> +*   AddressingModelPhysical64: 64
> +*/
> +   unsigned ptr_size;
>

I think this is worth stashing but it seems to me like it belongs in the cs
portion of shader_info along with the local workgroup size rather than
nir_shader.


>  } nir_shader;
>
>  static inline nir_function_impl *
> diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
> index 989c5051a54..d47d3e8cb72 100644
> --- a/src/compiler/nir/nir_clone.c
> +++ b/src/compiler/nir/nir_clone.c
> @@ -733,6 +733,7 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s)
> ns->num_uniforms = s->num_uniforms;
> ns->num_outputs = s->num_outputs;
> ns->num_shared = s->num_shared;
> +   ns->ptr_size = s->ptr_size;
>
> ns->constant_data_size = s->constant_data_size;
> if (s->constant_data_size > 0) {
> diff --git a/src/compiler/nir/nir_serialize.c
> b/src/compiler/nir/nir_serialize.c
> index 43016310048..5ec6972b02a 100644
> --- a/src/compiler/nir/nir_serialize.c
> +++ b/src/compiler/nir/nir_serialize.c
> @@ -1106,6 +1106,7 @@ nir_serialize(struct blob *blob, const nir_shader
> *nir)
> blob_write_uint32(blob, nir->num_uniforms);
> blob_write_uint32(blob, nir->num_outputs);
> blob_write_uint32(blob, nir->num_shared);
> +   blob_write_uint32(blob, nir->ptr_size);
>
> blob_write_uint32(blob, exec_list_length(>functions));
> nir_foreach_function(fxn, nir) {
> @@ -1165,6 +1166,7 @@ nir_deserialize(void *mem_ctx,
> ctx.nir->num_uniforms = blob_read_uint32(blob);
> ctx.nir->num_outputs = blob_read_uint32(blob);
> ctx.nir->num_shared = blob_read_uint32(blob);
> +   ctx.nir->ptr_size = blob_read_uint32(blob);
>
> unsigned num_functions = blob_read_uint32(blob);
> for (unsigned i = 0; i < num_functions; i++)
> diff --git a/src/compiler/spirv/spirv_to_nir.c
> b/src/compiler/spirv/spirv_to_nir.c
> index e41a7e960ce..1a7d5b3a9bd 100644
> --- a/src/compiler/spirv/spirv_to_nir.c
> +++ b/src/compiler/spirv/spirv_to_nir.c
> @@ -3581,9 +3581,27 @@ vtn_handle_preamble_instruction(struct vtn_builder
> *b, SpvOp opcode,
>break;
>
> case SpvOpMemoryModel:
> -  vtn_assert(w[1] == SpvAddressingModelLogical);
> +  switch (w[1]) {
> +  case SpvAddressingModelPhysical32:
> + b->shader->ptr_size = 32;
> + b->physical_ptrs = true;
> + break;
> +  case SpvAddressingModelPhysical64:
> + b->shader->ptr_size = 64;
> + b->physical_ptrs = true;
> + break;
> +  case SpvAddressingModelLogical:
> + b->shader->ptr_size = 0;
> + b->physical_ptrs = false;
> + break;
> +  default:
> + vtn_fail("Unknown addressing model");
> + break;
>

With my patches for giving pointers explicit types, we'll likely want to
add a type for global and function and set global/function/shared to the
explicitly defined size here.


> +  }
> +
>vtn_assert(w[2] == SpvMemoryModelSimple ||
> - w[2] == SpvMemoryModelGLSL450);
> + w[2] == SpvMemoryModelGLSL450 ||
> + w[2] == SpvMemoryModelOpenCL);
>break;
>
> case SpvOpEntryPoint:
> @@ -4258,6 +4276,8 @@ spirv_to_nir(const uint32_t *words, size_t
> word_count,
> /* Skip the SPIR-V header, handled at vtn_create_builder */
> words+= 5;
>
> +   b->shader = nir_shader_create(b, stage, nir_options, NULL);
> +
> /* Handle all the preamble instructions */
> words = vtn_foreach_instruction(b, words, word_end,
> vtn_handle_preamble_instruction);
> @@ -4268,8 +4288,6 @@ spirv_to_nir(const uint32_t *words, size_t
> word_count,
>return NULL;
> }
>
> -   b->shader = nir_shader_create(b, stage, nir_options, NULL);
> -
> /* Set shader info defaults */
> b->shader->info.gs.invocations = 1;
>
> diff --git a/src/compiler/spirv/vtn_private.h
> b/src/compiler/spirv/vtn_private.h
> index da7a04ce59f..47f26dac642 100644
> --- a/src/compiler/spirv/vtn_private.h
> +++ b/src/compiler/spirv/vtn_private.h
> @@ -605,6 +605,9 @@ struct vtn_builder {
> unsigned func_param_idx;
>
> bool has_loop_continue;
> +
> +   /* when a physical 

Re: [Mesa-dev] [PATCH] glx: Fix compilation with GLX_USE_WINDOWSGL

2019-01-07 Thread Jon Turney

On 14/12/2018 19:22, Jon Turney wrote:

On 14/12/2018 15:15, Emil Velikov wrote:
On Fri, 14 Dec 2018 at 14:56, Jon Turney  
wrote:


Sadly, the GLX_USE_APPLEGL and GLX_USE_WINDOWSGL cases are not identical
(because GLX_USE_WINDOWSGL uses vtables rather than a maze of ifdefs)

Include  again, as functions prototyped by it are used in
the GLX_USE_WINDOWSGL path.

Make the include guard around the __glxGetMscRate() definition match the
one at it's declaration again, as it's referenced from dri_common.c
which is built for GLX_USE_WINDOWSGL.

Cc: Emil Velikov 
Fixes: a95ec138 ("glx: mandate xf86vidmode only for "drm" dri 
platforms")

Signed-off-by: Jon Turney 

Reviewed-by: Emil Velikov 

Jon, when you have some time, can you please write a Travis config for 
Cygwin.

This way we can do some basic checks a bit easier.


Yeah, that would be a good idea.

I've posted an updated version of my patchset to add a cygwin build for 
appveyor.


I've now pushed this.


Doing this on travis will be challenging, especially to run the tests.


If/when we move to gitlab CI we can simply copy it over ;-)


(It is apparently now possible to use appveyor as a CI provider for a 
self-hosted gitlab instance (see [1]), but I've no idea how to set that up)


[1] https://github.com/appveyor/ci/issues/711


This turns out to be straightforward, if you want to set this up on a 
personal gitlab repo:


1. Log into gitlab web interface
2. Settings/Access Tokens.  Create a token for appveyor to use.
3. Log into appveyor web interface
4. '+ New project', choose 'GitLab EE'. Provide base URL 
'https://gitlab.freedesktop.org' and the access token from above. Choose 
mesa from the list of projects.

5. Done

This installs suitable webhooks in giblab to notify appveyor of pushes 
and MRs, and appveyor uses the gitlab commit status API to report the 
result, which appears as an 'external' pipeline in gitlab.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: correct typo in GLSL compilation error message

2019-01-07 Thread Erik Faye-Lund
On Mon, 2019-01-07 at 15:50 +0200, Andres Gomez wrote:
> Correct a typo introduced by
> 037f68d81e1 ("glsl: apply align layout qualifier rules to block
> offsets")
> 
> Cc: Timothy Arceri 
> Signed-off-by: Andres Gomez 

This should probably have this tag:

Fixes: 037f68d81e1 "glsl: apply align layout qualifier rules to block
offsets"

With that added:

Reviewed-by: Erik Faye-Lund 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] docs: complete the calendar and release schedule documentation

2019-01-07 Thread Andres Gomez
As suggested by Emil Velikov.

Cc: Dylan Baker 
Cc: Juan A. Suarez 
Cc: Emil Velikov 
Signed-off-by: Andres Gomez 
---
 docs/release-calendar.html | 10 ++
 docs/releasing.html| 14 --
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/docs/release-calendar.html b/docs/release-calendar.html
index afef899b0e0..3f4e1e9d8b9 100644
--- a/docs/release-calendar.html
+++ b/docs/release-calendar.html
@@ -23,6 +23,16 @@ Mesa provides feature/development and stable releases.
 The table below lists the date and release manager that is expected to do the
 specific release.
 
+Regular updates will ensure that the schedule for the current and the
+next two feature releases are shown in the table.
+
+In order to keep the whole releasing team up to date with the tools
+used, best practices and other details, the member in charge of the
+next feature release will be in constant rotation.
+
+The way the release schedule works is
+explained here.
+
 Take a look here
 if you'd like to nominate a patch in the next stable release.
 
diff --git a/docs/releasing.html b/docs/releasing.html
index e4c770f9186..851bbf58670 100644
--- a/docs/releasing.html
+++ b/docs/releasing.html
@@ -56,9 +56,10 @@ For example:
 
 
 Releases should happen on Wednesdays. Delays can occur although those
-should be keep to a minimum.
+should be kept to a minimum.
 
-See our calendar for the
+See our calendar
+for information about how the release schedule is planned, and the
 date and other details for individual releases.
 
 
@@ -67,6 +68,9 @@ date and other details for individual releases.
 Available approximately every three months.
 Initial timeplan available 2-4 weeks before the planned branchpoint (rc1)
 on the mesa-announce@ mailing list.
+Typically, the final release will happen after 4
+candidates. Additional ones may be needed in order to resolve blocking
+regressions, though.
 A pre-release announcement should be available
 approximately 24 hours before the final (non-rc) release.
 
@@ -84,6 +88,12 @@ Note: There is one or two releases overlap when changing 
branches. For example:
 
 The final release from the 12.0 series Mesa 12.0.5 will be out around the same
 time (or shortly after) 13.0.1 is out.
+
+This also involves that, as a final release may be delayed due to the
+need of additional candidates to solve some blocking regression(s),
+the release manager might have to update
+the calendar with
+additional bug fix releases of the current stable branch.
 
 
 
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv/android: handle storage images in vkGetSwapchainGrallocUsageANDROID

2019-01-07 Thread Bas Nieuwenhuizen
On Wed, Dec 5, 2018 at 1:05 PM Tapani Pälli  wrote:
>
>
>
> On 12/5/18 2:00 PM, Bas Nieuwenhuizen wrote:
> > On Wed, Dec 5, 2018 at 12:51 PM Tapani Pälli  wrote:
> >>
> >>
> >>
> >> On 12/5/18 1:44 PM, Bas Nieuwenhuizen wrote:
> >>> On Wed, Dec 5, 2018 at 12:37 PM Tapani Pälli  
> >>> wrote:
> 
> 
> 
>  On 12/5/18 1:22 PM, Bas Nieuwenhuizen wrote:
> > On Wed, Dec 5, 2018 at 12:15 PM Tapani Pälli  
> > wrote:
> >>
> >>
> >>
> >> On 12/5/18 1:01 PM, Bas Nieuwenhuizen wrote:
> >>> On Fri, Sep 7, 2018 at 12:54 AM Kevin Strasser 
> >>>  wrote:
> 
>  Android P and earlier expect that the surface supports storage 
>  images, and
>  so many of the tests fail when the framework checks for that 
>  support. The
>  framework also includes various image format and usage combinations 
>  that are
>  invalid for the hardware.
> 
>  Drop the STORAGE restriction from the HAL and whitelist a pair of
>  formats so that existing versions of Android can pass these tests.
> 
>  Fixes:
> dEQP-VK.wsi.android.*
> 
>  Signed-off-by: Kevin Strasser 
>  ---
>   src/intel/vulkan/anv_android.c | 23 ++-
>   1 file changed, 14 insertions(+), 9 deletions(-)
> 
>  diff --git a/src/intel/vulkan/anv_android.c 
>  b/src/intel/vulkan/anv_android.c
>  index 46c41d5..e2640b8 100644
>  --- a/src/intel/vulkan/anv_android.c
>  +++ b/src/intel/vulkan/anv_android.c
>  @@ -234,7 +234,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
>  *grallocUsage = 0;
>  intel_logd("%s: format=%d, usage=0x%x", __func__, format, 
>  imageUsage);
> 
>  -   /* WARNING: Android Nougat's libvulkan.so hardcodes the 
>  VkImageUsageFlags
>  +   /* WARNING: Android's libvulkan.so hardcodes the 
>  VkImageUsageFlags
>   * returned to applications via 
>  VkSurfaceCapabilitiesKHR::supportedUsageFlags.
>   * The relevant code in libvulkan/swapchain.cpp contains 
>  this fun comment:
>   *
>  @@ -247,7 +247,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
>   * dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
>   */
> 
>  -   const VkPhysicalDeviceImageFormatInfo2KHR image_format_info = {
>  +   VkPhysicalDeviceImageFormatInfo2KHR image_format_info = {
> >>>
> >>> Why remove the const here?
> >>>
> .sType = 
>  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR,
> .format = format,
> .type = VK_IMAGE_TYPE_2D,
>  @@ -255,6 +255,17 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
> .usage = imageUsage,
>  };
> 
>  +   /* Android P and earlier doesn't check if the physical device 
>  supports a
>  +* given format and usage combination before calling this 
>  function. Omit the
>  +* storage requirement to make the tests pass.
>  +*/
>  +#if ANDROID_API_LEVEL <= 28
>  +   if (format == VK_FORMAT_R8G8B8A8_SRGB ||
>  +   format == VK_FORMAT_R5G6B5_UNORM_PACK16) {
>  +  image_format_info.usage &= ~VK_IMAGE_USAGE_STORAGE_BIT;
>  +   }
>  +#endif
> >>>
> >>> I don't think you need this. Per the vulkan spec you can only use an
> >>> format + usage combination for a swapchain if it is supported per
> >>> ImageFormatProperties, using essentially the same check happening
> >>> above. I know CTs has been bad at this, but Vulkan CTS should have
> >>> been fixed for a bit now. (I don't think all the fixes are in Android
> >>> CTS 9.0_r4 yet, maybe the next release?)
> >>
> >> AFAIK the problem here is not about CTS. It's the swapchain
> >> implementation that always requires storage support.
> >
> > Actually swapchain creation has the following valid usage rule:
> >
> > "The implied image creation parameters of the swapchain must be
> > supported as reported by vkGetPhysicalDeviceImageFormatProperties"
> >
> > So since those formats don't support the STORAGE usage bit, that test
> > fails and you are not allowed to create a swapchain with those formats
> > and storage, even if the surface capabiliities expose the STORAGE
> > usage bit in general.
> 
>  Right ... this stuff was done because comment in the swapchain setting
>  the bits seems like maybe it's not thought through:
> 
>  // TODO(jessehall): I think these are right, but haven't thought hard 
>  about
>  // it. Do we need to query the driver for support of any of these?
> >>>
> 

Re: [Mesa-dev] [PATCH] anv/android: handle storage images in vkGetSwapchainGrallocUsageANDROID

2019-01-07 Thread Bas Nieuwenhuizen
On Mon, Jan 7, 2019 at 11:54 AM Tapani Pälli  wrote:
>
>
>
> On 1/7/19 11:56 AM, Bas Nieuwenhuizen wrote:
> > On Wed, Dec 5, 2018 at 1:05 PM Tapani Pälli  wrote:
> >>
> >>
> >>
> >> On 12/5/18 2:00 PM, Bas Nieuwenhuizen wrote:
> >>> On Wed, Dec 5, 2018 at 12:51 PM Tapani Pälli  
> >>> wrote:
> 
> 
> 
>  On 12/5/18 1:44 PM, Bas Nieuwenhuizen wrote:
> > On Wed, Dec 5, 2018 at 12:37 PM Tapani Pälli  
> > wrote:
> >>
> >>
> >>
> >> On 12/5/18 1:22 PM, Bas Nieuwenhuizen wrote:
> >>> On Wed, Dec 5, 2018 at 12:15 PM Tapani Pälli  
> >>> wrote:
> 
> 
> 
>  On 12/5/18 1:01 PM, Bas Nieuwenhuizen wrote:
> > On Fri, Sep 7, 2018 at 12:54 AM Kevin Strasser 
> >  wrote:
> >>
> >> Android P and earlier expect that the surface supports storage 
> >> images, and
> >> so many of the tests fail when the framework checks for that 
> >> support. The
> >> framework also includes various image format and usage 
> >> combinations that are
> >> invalid for the hardware.
> >>
> >> Drop the STORAGE restriction from the HAL and whitelist a pair of
> >> formats so that existing versions of Android can pass these tests.
> >>
> >> Fixes:
> >> dEQP-VK.wsi.android.*
> >>
> >> Signed-off-by: Kevin Strasser 
> >> ---
> >>   src/intel/vulkan/anv_android.c | 23 ++-
> >>   1 file changed, 14 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/src/intel/vulkan/anv_android.c 
> >> b/src/intel/vulkan/anv_android.c
> >> index 46c41d5..e2640b8 100644
> >> --- a/src/intel/vulkan/anv_android.c
> >> +++ b/src/intel/vulkan/anv_android.c
> >> @@ -234,7 +234,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
> >>  *grallocUsage = 0;
> >>  intel_logd("%s: format=%d, usage=0x%x", __func__, format, 
> >> imageUsage);
> >>
> >> -   /* WARNING: Android Nougat's libvulkan.so hardcodes the 
> >> VkImageUsageFlags
> >> +   /* WARNING: Android's libvulkan.so hardcodes the 
> >> VkImageUsageFlags
> >>   * returned to applications via 
> >> VkSurfaceCapabilitiesKHR::supportedUsageFlags.
> >>   * The relevant code in libvulkan/swapchain.cpp contains 
> >> this fun comment:
> >>   *
> >> @@ -247,7 +247,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
> >>   * dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
> >>   */
> >>
> >> -   const VkPhysicalDeviceImageFormatInfo2KHR image_format_info = {
> >> +   VkPhysicalDeviceImageFormatInfo2KHR image_format_info = {
> >
> > Why remove the const here?
> >
> >> .sType = 
> >> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR,
> >> .format = format,
> >> .type = VK_IMAGE_TYPE_2D,
> >> @@ -255,6 +255,17 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
> >> .usage = imageUsage,
> >>  };
> >>
> >> +   /* Android P and earlier doesn't check if the physical device 
> >> supports a
> >> +* given format and usage combination before calling this 
> >> function. Omit the
> >> +* storage requirement to make the tests pass.
> >> +*/
> >> +#if ANDROID_API_LEVEL <= 28
> >> +   if (format == VK_FORMAT_R8G8B8A8_SRGB ||
> >> +   format == VK_FORMAT_R5G6B5_UNORM_PACK16) {
> >> +  image_format_info.usage &= ~VK_IMAGE_USAGE_STORAGE_BIT;
> >> +   }
> >> +#endif
> >
> > I don't think you need this. Per the vulkan spec you can only use an
> > format + usage combination for a swapchain if it is supported per
> > ImageFormatProperties, using essentially the same check happening
> > above. I know CTs has been bad at this, but Vulkan CTS should have
> > been fixed for a bit now. (I don't think all the fixes are in 
> > Android
> > CTS 9.0_r4 yet, maybe the next release?)
> 
>  AFAIK the problem here is not about CTS. It's the swapchain
>  implementation that always requires storage support.
> >>>
> >>> Actually swapchain creation has the following valid usage rule:
> >>>
> >>> "The implied image creation parameters of the swapchain must be
> >>> supported as reported by vkGetPhysicalDeviceImageFormatProperties"
> >>>
> >>> So since those formats don't support the STORAGE usage bit, that test
> >>> fails and you are not allowed to create a swapchain with those formats
> >>> and storage, even if the surface capabiliities 

Re: [Mesa-dev] [PATCHv2 07/10] intel/fs: Introduce regioning lowering pass.

2019-01-07 Thread Iago Toral
On Sat, 2019-01-05 at 14:03 -0800, Francisco Jerez wrote:
> This legalization pass is meant to handle situations where the source
> or destination regioning controls of an instruction are unsupported
> by
> the hardware and need to be lowered away into separate instructions.
> This should be more reliable and future-proof than the current
> approach of handling CHV/BXT restrictions manually all over the
> visitor.  The same mechanism is leveraged to lower unsupported type
> conversions easily, which obsoletes the lower_conversions pass.
> 
> v2: Give conditional modifiers the same treatment as predicates for
> SEL instructions in lower_dst_modifiers() (Iago).  Special-case a
> couple of other instructions with inconsistent conditional mod
> semantics in lower_dst_modifiers() (Curro).
> ---
>  src/intel/Makefile.sources|   1 +
>  src/intel/compiler/brw_fs.cpp |   5 +-
>  src/intel/compiler/brw_fs.h   |  21 +-
>  src/intel/compiler/brw_fs_lower_regioning.cpp | 399
> ++
>  src/intel/compiler/brw_ir_fs.h|  10 +
>  src/intel/compiler/meson.build|   1 +
>  6 files changed, 418 insertions(+), 19 deletions(-)
>  create mode 100644 src/intel/compiler/brw_fs_lower_regioning.cpp
> 
> diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
> index 5e7d32293b7..6b9874d2b80 100644
> --- a/src/intel/Makefile.sources
> +++ b/src/intel/Makefile.sources
> @@ -64,6 +64,7 @@ COMPILER_FILES = \
>   compiler/brw_fs_live_variables.h \
>   compiler/brw_fs_lower_conversions.cpp \
>   compiler/brw_fs_lower_pack.cpp \
> + compiler/brw_fs_lower_regioning.cpp \
>   compiler/brw_fs_nir.cpp \
>   compiler/brw_fs_reg_allocate.cpp \
>   compiler/brw_fs_register_coalesce.cpp \
> diff --git a/src/intel/compiler/brw_fs.cpp
> b/src/intel/compiler/brw_fs.cpp
> index 889509badab..caa7a798332 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -6471,7 +6471,10 @@ fs_visitor::optimize()
>OPT(dead_code_eliminate);
> }
>  
> -   if (OPT(lower_conversions)) {
> +   progress = false;
> +   OPT(lower_conversions);
> +   OPT(lower_regioning);
> +   if (progress) {

This is a small nitpick but since this makes lower_conversions
redundant, maybe it makes more sense to just remove the call to it here
already in this patch so you don't have to reset the progress variable
and simply do:

if (OPT(lower_regioning)) {
   ...
}

>OPT(opt_copy_propagation);
>OPT(dead_code_eliminate);
>OPT(lower_simd_width);
> diff --git a/src/intel/compiler/brw_fs.h
> b/src/intel/compiler/brw_fs.h
> index dc36ecc21ac..36825754931 100644
> --- a/src/intel/compiler/brw_fs.h
> +++ b/src/intel/compiler/brw_fs.h
> @@ -164,6 +164,7 @@ public:
> void lower_uniform_pull_constant_loads();
> bool lower_load_payload();
> bool lower_pack();
> +   bool lower_regioning();
> bool lower_conversions();
> bool lower_logical_sends();
> bool lower_integer_multiplication();
> @@ -536,24 +537,8 @@ namespace brw {
>}
> }
>  
> -   /**
> -* Remove any modifiers from the \p i-th source region of the
> instruction,
> -* including negate, abs and any implicit type conversion to the
> execution
> -* type.  Instead any source modifiers will be implemented as a
> separate
> -* MOV instruction prior to the original instruction.
> -*/
> -   inline bool
> -   lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst
> *inst, unsigned i)
> -   {
> -  assert(inst->components_read(i) == 1);
> -  const fs_builder ibld(v, block, inst);
> -  const fs_reg tmp = ibld.vgrf(get_exec_type(inst));
> -
> -  ibld.MOV(tmp, inst->src[i]);
> -  inst->src[i] = tmp;
> -
> -  return true;
> -   }
> +   bool
> +   lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst
> *inst, unsigned i);
>  }
>  
>  void shuffle_from_32bit_read(const brw::fs_builder ,
> diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp
> b/src/intel/compiler/brw_fs_lower_regioning.cpp
> new file mode 100644
> index 000..d7c97e1442a
> --- /dev/null
> +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp
> @@ -0,0 +1,399 @@
> +/*
> + * Copyright © 2018 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person
> obtaining a
> + * copy of this software and associated documentation files (the
> "Software"),
> + * to deal in the Software without restriction, including without
> limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom
> the
> + * Software is furnished to do so, subject to the following
> conditions:
> + *
> + * The above copyright notice and this permission notice (including
> the next
> + * paragraph) shall be included in all copies or substantial
> portions of the
> + * Software.
> + *
> + * 

[Mesa-dev] [PATCH] glsl: correct typo in GLSL compilation error message

2019-01-07 Thread Andres Gomez
Correct a typo introduced by
037f68d81e1 ("glsl: apply align layout qualifier rules to block offsets")

Cc: Timothy Arceri 
Signed-off-by: Andres Gomez 
---
 src/compiler/glsl/ast_to_hir.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 9199230a7af..8fdc1890ab0 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -7417,7 +7417,7 @@ ast_process_struct_or_iface_block_members(exec_list 
*instructions,
   if (member_align == 0 ||
   member_align & (member_align - 1)) {
  _mesa_glsl_error(, state, "align layout qualifier "
-  "in not a power of 2");
+  "is not a power of 2");
   } else {
  fields[i].offset = glsl_align(offset, member_align);
  next_offset = glsl_align(fields[i].offset + size, align);
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl/linker: specify proper direction in location aliasing error

2019-01-07 Thread Andres Gomez
The check for location aliasing was always asuming output variables
but this validation is also called for input variables.

Fixes: e2abb75b0e4 ("glsl/linker: validate explicit locations for SSO programs")
Cc: Iago Toral Quiroga 
Signed-off-by: Andres Gomez 
---
 src/compiler/glsl/link_varyings.cpp | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp
index 52e493cb599..3969c0120b3 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -481,9 +481,10 @@ check_location_aliasing(struct explicit_location_info 
explicit_locations[][4],
 /* Component aliasing is not alloed */
 if (comp >= component && comp < last_comp) {
linker_error(prog,
-"%s shader has multiple outputs explicitly "
+"%s shader has multiple %sputs explicitly "
 "assigned to location %d and component %d\n",
 _mesa_shader_stage_to_string(stage),
+var->data.mode == ir_var_shader_in ? "in" : "out",
 location, comp);
return false;
 } else {
@@ -502,10 +503,12 @@ check_location_aliasing(struct explicit_location_info 
explicit_locations[][4],
 
if (info->interpolation != interpolation) {
   linker_error(prog,
-   "%s shader has multiple outputs at explicit "
+   "%s shader has multiple %sputs at explicit "
"location %u with different interpolation "
"settings\n",
-   _mesa_shader_stage_to_string(stage), location);
+   _mesa_shader_stage_to_string(stage),
+   var->data.mode == ir_var_shader_in ?
+   "in" : "out", location);
   return false;
}
 
@@ -513,9 +516,11 @@ check_location_aliasing(struct explicit_location_info 
explicit_locations[][4],
info->sample != sample ||
info->patch != patch) {
   linker_error(prog,
-   "%s shader has multiple outputs at explicit "
+   "%s shader has multiple %sputs at explicit "
"location %u with different aux storage\n",
-   _mesa_shader_stage_to_string(stage), location);
+   _mesa_shader_stage_to_string(stage),
+   var->data.mode == ir_var_shader_in ?
+   "in" : "out", location);
   return false;
}
 }
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] editorconfig: Add max_line_length property

2019-01-07 Thread Andres Gomez
The property is supported by the most of the editors, but not all:
https://github.com/editorconfig/editorconfig/wiki/EditorConfig-Properties#max_line_length

Cc: Eric Engestrom 
Cc: Eric Anholt 
Signed-off-by: Andres Gomez 
---
 .editorconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.editorconfig b/.editorconfig
index 06848f68c08..c2b772da6c1 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -11,6 +11,7 @@ tab_width = 8
 [*.{c,h,cpp,hpp,cc,hh}]
 indent_style = space
 indent_size = 3
+max_line_length = 78
 
 [{Makefile*,*.mk}]
 indent_style = tab
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv/android: handle storage images in vkGetSwapchainGrallocUsageANDROID

2019-01-07 Thread Tapani Pälli



On 1/7/19 1:28 PM, Bas Nieuwenhuizen wrote:

On Mon, Jan 7, 2019 at 11:54 AM Tapani Pälli  wrote:




On 1/7/19 11:56 AM, Bas Nieuwenhuizen wrote:

On Wed, Dec 5, 2018 at 1:05 PM Tapani Pälli  wrote:




On 12/5/18 2:00 PM, Bas Nieuwenhuizen wrote:

On Wed, Dec 5, 2018 at 12:51 PM Tapani Pälli  wrote:




On 12/5/18 1:44 PM, Bas Nieuwenhuizen wrote:

On Wed, Dec 5, 2018 at 12:37 PM Tapani Pälli  wrote:




On 12/5/18 1:22 PM, Bas Nieuwenhuizen wrote:

On Wed, Dec 5, 2018 at 12:15 PM Tapani Pälli  wrote:




On 12/5/18 1:01 PM, Bas Nieuwenhuizen wrote:

On Fri, Sep 7, 2018 at 12:54 AM Kevin Strasser  wrote:


Android P and earlier expect that the surface supports storage images, and
so many of the tests fail when the framework checks for that support. The
framework also includes various image format and usage combinations that are
invalid for the hardware.

Drop the STORAGE restriction from the HAL and whitelist a pair of
formats so that existing versions of Android can pass these tests.

Fixes:
 dEQP-VK.wsi.android.*

Signed-off-by: Kevin Strasser 
---
   src/intel/vulkan/anv_android.c | 23 ++-
   1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/intel/vulkan/anv_android.c b/src/intel/vulkan/anv_android.c
index 46c41d5..e2640b8 100644
--- a/src/intel/vulkan/anv_android.c
+++ b/src/intel/vulkan/anv_android.c
@@ -234,7 +234,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
  *grallocUsage = 0;
  intel_logd("%s: format=%d, usage=0x%x", __func__, format, imageUsage);

-   /* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags
+   /* WARNING: Android's libvulkan.so hardcodes the VkImageUsageFlags
   * returned to applications via 
VkSurfaceCapabilitiesKHR::supportedUsageFlags.
   * The relevant code in libvulkan/swapchain.cpp contains this fun 
comment:
   *
@@ -247,7 +247,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
   * dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
   */

-   const VkPhysicalDeviceImageFormatInfo2KHR image_format_info = {
+   VkPhysicalDeviceImageFormatInfo2KHR image_format_info = {


Why remove the const here?


 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR,
 .format = format,
 .type = VK_IMAGE_TYPE_2D,
@@ -255,6 +255,17 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
 .usage = imageUsage,
  };

+   /* Android P and earlier doesn't check if the physical device supports a
+* given format and usage combination before calling this function. Omit the
+* storage requirement to make the tests pass.
+*/
+#if ANDROID_API_LEVEL <= 28
+   if (format == VK_FORMAT_R8G8B8A8_SRGB ||
+   format == VK_FORMAT_R5G6B5_UNORM_PACK16) {
+  image_format_info.usage &= ~VK_IMAGE_USAGE_STORAGE_BIT;
+   }
+#endif


I don't think you need this. Per the vulkan spec you can only use an
format + usage combination for a swapchain if it is supported per
ImageFormatProperties, using essentially the same check happening
above. I know CTs has been bad at this, but Vulkan CTS should have
been fixed for a bit now. (I don't think all the fixes are in Android
CTS 9.0_r4 yet, maybe the next release?)


AFAIK the problem here is not about CTS. It's the swapchain
implementation that always requires storage support.


Actually swapchain creation has the following valid usage rule:

"The implied image creation parameters of the swapchain must be
supported as reported by vkGetPhysicalDeviceImageFormatProperties"

So since those formats don't support the STORAGE usage bit, that test
fails and you are not allowed to create a swapchain with those formats
and storage, even if the surface capabiliities expose the STORAGE
usage bit in general.


Right ... this stuff was done because comment in the swapchain setting
the bits seems like maybe it's not thought through:

// TODO(jessehall): I think these are right, but haven't thought hard about
// it. Do we need to query the driver for support of any of these?


That was from before the spec was changed to add that rule.


OK if I understand correctly, so should we rather then try to fix those
tests to skip instead of fail?


They should be fixed with:
https://github.com/KhronosGroup/VK-GL-CTS/commit/49eab80e4a8b3af1790b9ac88b096aa9bffd193f#diff-8369d6640a2c6ad0c0fc1d85b113faeb
https://github.com/KhronosGroup/VK-GL-CTS/commit/858f5396a4f63223fcf31f717d23b4b552e10182#diff-8369d6640a2c6ad0c0fc1d85b113faeb


Thanks, will try with these!


Hi,

Did you have any luck with this? This patch (or mine) are still
pending review based on this?


Sorry I've forgotten this but will get to this now. Could you please
pinpoint which patch from you was referred here?


https://patchwork.freedesktop.org/patch/265974/

(Though it is missing a bit: see
https://chromium-review.googlesource.com/c/chromiumos/third_party/mesa/+/1366537
for what I ended up using in ChromeOS)



Re: [Mesa-dev] [PATCH] radv: skip draws with instance_count == 0

2019-01-07 Thread Bas Nieuwenhuizen
On Mon, Jan 7, 2019 at 2:45 PM Samuel Pitoiset
 wrote:
>
> Loosely based on RadeonSI.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 14 ++
>  1 file changed, 14 insertions(+)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index 1966098e08c..8e3f1a7f5f0 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -3697,6 +3697,20 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,
> radeon_check_space(cmd_buffer->device->ws,
>cmd_buffer->cs, 4096);
>
> +   if (likely(!info->indirect)) {
> +   /* SI-CI treat instance_count==0 as instance_count==1. There 
> is
> +* no workaround for indirect draws, but we can at least skip
> +* direct draws.
> +*/
> +   if (unlikely(!info->instance_count))
> +   return;
> +
> +   /* Handle count == 0. */
> +   if (unlikely(!info->count &&
> +(info->indexed || !info->strmout_buffer)))

Why the extra condition of (info->indexed || !info->strmout_buffer)?

> +   return;
> +   }
> +
> /* Use optimal packet order based on whether we need to sync the
>  * pipeline.
>  */
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] etnaviv_screen: Fix typo in cflush_all description

2019-01-07 Thread Christian Gmeiner
Am Mo., 7. Jan. 2019 um 15:00 Uhr schrieb Guido Günther :
>
> Signed-off-by: Guido Günther 

Reviewed-by: Christian Gmeiner 

> ---
>  src/gallium/drivers/etnaviv/etnaviv_screen.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
> b/src/gallium/drivers/etnaviv/etnaviv_screen.c
> index fb51aa5f4e8..fd320232528 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
> @@ -63,7 +63,7 @@ static const struct debug_named_value debug_options[] = {
> {"no_autodisable", ETNA_DBG_NO_AUTODISABLE, "Disable autodisable"},
> {"no_supertile",   ETNA_DBG_NO_SUPERTILE, "Disable supertiles"},
> {"no_early_z", ETNA_DBG_NO_EARLY_Z, "Disable early z"},
> -   {"cflush_all", ETNA_DBG_CFLUSH_ALL, "Flush every cash before state 
> update"},
> +   {"cflush_all", ETNA_DBG_CFLUSH_ALL, "Flush every cache before state 
> update"},
> {"msaa2x", ETNA_DBG_MSAA_2X, "Force 2x msaa"},
> {"msaa4x", ETNA_DBG_MSAA_4X, "Force 4x msaa"},
> {"flush_all",  ETNA_DBG_FLUSH_ALL, "Flush after every rendered 
> primitive"},
> --
> 2.19.2
> ___
> etnaviv mailing list
> etna...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/etnaviv



-- 
greets
--
Christian Gmeiner, MSc

https://christian-gmeiner.info
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/10] intel/fs: Respect CHV/BXT regioning restrictions in copy propagation pass.

2019-01-07 Thread Iago Toral
On Sat, 2018-12-29 at 12:38 -0800, Francisco Jerez wrote:
> Currently the visitor attempts to enforce the regioning restrictions
> that apply to double-precision instructions on CHV/BXT at NIR-to-i965
> translation time.  It is possible though for the copy propagation
> pass
> to violate this restriction if a strided move is propagated into one
> of the affected instructions.  I've only reproduced this issue on a
> future platform but it could affect CHV/BXT too under the right
> conditions.
> 
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>  .../compiler/brw_fs_copy_propagation.cpp  | 10 +++
>  src/intel/compiler/brw_ir_fs.h| 28
> +++
>  2 files changed, 38 insertions(+)
> 
> diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp
> b/src/intel/compiler/brw_fs_copy_propagation.cpp
> index a8ec1c34630..c23ce1ef426 100644
> --- a/src/intel/compiler/brw_fs_copy_propagation.cpp
> +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
> @@ -315,6 +315,16 @@ can_take_stride(fs_inst *inst, unsigned arg,
> unsigned stride,
> if (stride > 4)
>return false;
>  
> +   /* Bail if the channels of the source need to be aligned to the
> byte offset
> +* of the corresponding channel of the destination, and the
> provided stride
> +* would break this restriction.
> +*/
> +   if (has_dst_aligned_region_restriction(devinfo, inst) &&
> +   !(type_sz(inst->src[arg].type) * stride ==
> +   type_sz(inst->dst.type) * inst->dst.stride ||
> + stride == 0))
> +  return false;
> +
> /* 3-source instructions can only be Align16, which restricts
> what strides
>  * they can take. They can only take a stride of 1 (the usual
> case), or 0
>  * with a special "repctrl" bit. But the repctrl bit doesn't work
> for
> diff --git a/src/intel/compiler/brw_ir_fs.h
> b/src/intel/compiler/brw_ir_fs.h
> index 07e7224e0f8..95b069a2e02 100644
> --- a/src/intel/compiler/brw_ir_fs.h
> +++ b/src/intel/compiler/brw_ir_fs.h
> @@ -486,4 +486,32 @@ get_exec_type_size(const fs_inst *inst)
> return type_sz(get_exec_type(inst));
>  }
>  
> +/**
> + * Return whether the following regioning restriction applies to the
> specified
> + * instruction.  From the Cherryview PRM Vol 7. "Register Region
> + * Restrictions":
> + *
> + * "When source or destination datatype is 64b or operation is
> integer DWord
> + *  multiply, regioning in Align1 must follow these rules:
> + *
> + *  1. Source and Destination horizontal stride must be aligned to
> the same qword.
> + *  2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
> + *  3. Source and Destination offset must be the same, except the
> case of
> + * scalar source."
> + */
> +static inline bool
> +has_dst_aligned_region_restriction(const gen_device_info *devinfo,
> +   const fs_inst *inst)
> +{
> +   const brw_reg_type exec_type = get_exec_type(inst);
> +   const bool is_int_multiply =
> !brw_reg_type_is_floating_point(exec_type) &&
> + (inst->opcode == BRW_OPCODE_MUL || inst->opcode ==
> BRW_OPCODE_MAD);

Should this be extended to include MAC and MACH too?

> +
> +   if (type_sz(inst->dst.type) > 4 || type_sz(exec_type) > 4 ||
> +   (type_sz(exec_type) == 4 && is_int_multiply))
> +  return devinfo->is_cherryview ||
> gen_device_info_is_9lp(devinfo);

How about:

if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
   ...
} else {
   return false;
}

since we only really need to do these checks in those platforms it
might make a bit more sense to do it this way.

> +   else
> +  return false;
> +}
> +
>  #endif

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 0/6] Enable fp16 visuals and fbconfigs

2019-01-07 Thread Tapani Pälli



On 1/7/19 8:44 AM, Tapani Pälli wrote:

Hi;

On 1/4/19 11:56 PM, Kevin Strasser wrote:
This series enables fp16 fbconfigs and visuals by leveraging existing 
off-screen

rendering support.

These formats can be used in conjunction with 
EXT_surface_SMPTE2086_metadata

(not yet implemented by any drivers) to support EXT_gl_colorspace_scrgb /
EXT_gl_colorspace_scrgb_linear, used in places like Android wide color 
gamut.


While I have run this series against Piglit, I still need to sort out 
test
coverage for these formats. If anyone has pointers to existing tests 
that would

be really helpful.


dEQP (EGL module) has set of 'wide color' tests that also cover 1010102 
and fp16.


Having said that, it's not really a 'complete test' but at least 
something to start with. Does kernel already support fp16?




As an easy smoke test I have a modified version of kmscube:
   https://github.com/strassek/kmscube/commits/fp16

Kevin Strasser (6):
   dri: Support 64 bit rgba masks
   dri: Set bit for float configs
   drm-uapi: Add fp16 formats to drm_fourcc.h
   dri: Enable fp16 configs and visuals
   gallium/winsys/kms: Respect format bpp
   gbm: Add visuals and buffer handling for fp16 formats

  include/GL/internal/dri_interface.h    | 11 ++-
  include/drm-uapi/drm_fourcc.h  |  8 +++
  src/egl/drivers/dri2/egl_dri2.c    | 32 +++--
  src/egl/drivers/dri2/egl_dri2.h    |  6 +-
  src/egl/drivers/dri2/platform_android.c    |  2 +-
  src/egl/drivers/dri2/platform_drm.c    | 79 
++

  src/egl/drivers/dri2/platform_surfaceless.c    |  2 +-
  src/egl/drivers/dri2/platform_wayland.c    |  2 +-
  src/egl/drivers/dri2/platform_x11.c    |  6 +-
  .../auxiliary/pipe-loader/driinfo_gallium.h    |  1 +
  src/gallium/state_trackers/dri/dri2.c  | 22 ++
  src/gallium/state_trackers/dri/dri_drawable.c  |  3 +
  src/gallium/state_trackers/dri/dri_screen.c    | 26 ++-
  src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c  |  2 +-
  src/gbm/backends/dri/gbm_dri.c | 38 ++-
  src/gbm/backends/dri/gbm_driint.h  |  9 +--
  src/gbm/main/gbm.c |  3 +
  src/gbm/main/gbm.h |  9 +++
  src/glx/glxconfig.h    |  2 +-
  src/loader/loader_dri3_helper.c    |  5 ++
  src/mesa/drivers/dri/common/dri_util.c |  8 +++
  src/mesa/drivers/dri/common/utils.c    | 31 -
  src/mesa/drivers/dri/i965/intel_screen.c   | 39 ++-
  src/mesa/main/mtypes.h |  2 +-
  src/mesa/state_tracker/st_cb_fbo.c |  3 +
  src/util/xmlpool/t_options.h   |  5 ++
  26 files changed, 311 insertions(+), 45 deletions(-)


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] NIR documentation (was: NIR constant problem for GPU which doesn't have native integer support)

2019-01-07 Thread Eero Tamminen

Hi,

On 7.1.2019 12.42, Erik Faye-Lund wrote:

On Fri, 2019-01-04 at 09:40 -0600, Jason Ekstrand wrote:

[...]

Yeah...  Patches welcome?  There have been many attempts by Connor
and myself to better document NIR.  They all end up in /dev/null due
to EBIGGERFIRES. :-(

That said, if you ever want to know how something works, I'm logged
into IRC 24/7 and will happily answer questions.



That's understandable, but even so, at some point we need to reduce
some bus-factor. Somehow.

Do you have some links to the attempts at documenting NIR? Perhaps I
could take a look at it during some down-time?


Google search gave me:
http://www.jlekstrand.net/jason/projects/mesa/nir-notes/
https://people.freedesktop.org/~cwabbott0/nir-docs/

There are also some Igalia blog posts:

https://blogs.igalia.com/apinheiro/2016/06/02/introducing-mesa-intermediate-representations-on-intel-drivers-with-a-practical-example/

I assume you've already found them.  If they're still up to date, it 
might be good to add some links to them also to Mesa site.  At least I 
didn't notice them being referred there yet:

https://www.mesa3d.org/

(And they aren't referenced in any of the files in Mesa git.)


- Eero
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv/android: handle storage images in vkGetSwapchainGrallocUsageANDROID

2019-01-07 Thread Bas Nieuwenhuizen
On Mon, Jan 7, 2019 at 1:23 PM Tapani Pälli  wrote:
>
>
>
> On 1/7/19 1:28 PM, Bas Nieuwenhuizen wrote:
> > On Mon, Jan 7, 2019 at 11:54 AM Tapani Pälli  wrote:
> >>
> >>
> >>
> >> On 1/7/19 11:56 AM, Bas Nieuwenhuizen wrote:
> >>> On Wed, Dec 5, 2018 at 1:05 PM Tapani Pälli  
> >>> wrote:
> 
> 
> 
>  On 12/5/18 2:00 PM, Bas Nieuwenhuizen wrote:
> > On Wed, Dec 5, 2018 at 12:51 PM Tapani Pälli  
> > wrote:
> >>
> >>
> >>
> >> On 12/5/18 1:44 PM, Bas Nieuwenhuizen wrote:
> >>> On Wed, Dec 5, 2018 at 12:37 PM Tapani Pälli  
> >>> wrote:
> 
> 
> 
>  On 12/5/18 1:22 PM, Bas Nieuwenhuizen wrote:
> > On Wed, Dec 5, 2018 at 12:15 PM Tapani Pälli 
> >  wrote:
> >>
> >>
> >>
> >> On 12/5/18 1:01 PM, Bas Nieuwenhuizen wrote:
> >>> On Fri, Sep 7, 2018 at 12:54 AM Kevin Strasser 
> >>>  wrote:
> 
>  Android P and earlier expect that the surface supports storage 
>  images, and
>  so many of the tests fail when the framework checks for that 
>  support. The
>  framework also includes various image format and usage 
>  combinations that are
>  invalid for the hardware.
> 
>  Drop the STORAGE restriction from the HAL and whitelist a pair of
>  formats so that existing versions of Android can pass these 
>  tests.
> 
>  Fixes:
>   dEQP-VK.wsi.android.*
> 
>  Signed-off-by: Kevin Strasser 
>  ---
> src/intel/vulkan/anv_android.c | 23 
>  ++-
> 1 file changed, 14 insertions(+), 9 deletions(-)
> 
>  diff --git a/src/intel/vulkan/anv_android.c 
>  b/src/intel/vulkan/anv_android.c
>  index 46c41d5..e2640b8 100644
>  --- a/src/intel/vulkan/anv_android.c
>  +++ b/src/intel/vulkan/anv_android.c
>  @@ -234,7 +234,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
>    *grallocUsage = 0;
>    intel_logd("%s: format=%d, usage=0x%x", __func__, 
>  format, imageUsage);
> 
>  -   /* WARNING: Android Nougat's libvulkan.so hardcodes the 
>  VkImageUsageFlags
>  +   /* WARNING: Android's libvulkan.so hardcodes the 
>  VkImageUsageFlags
> * returned to applications via 
>  VkSurfaceCapabilitiesKHR::supportedUsageFlags.
> * The relevant code in libvulkan/swapchain.cpp 
>  contains this fun comment:
> *
>  @@ -247,7 +247,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
> * dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
> */
> 
>  -   const VkPhysicalDeviceImageFormatInfo2KHR image_format_info 
>  = {
>  +   VkPhysicalDeviceImageFormatInfo2KHR image_format_info = {
> >>>
> >>> Why remove the const here?
> >>>
>   .sType = 
>  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR,
>   .format = format,
>   .type = VK_IMAGE_TYPE_2D,
>  @@ -255,6 +255,17 @@ VkResult 
>  anv_GetSwapchainGrallocUsageANDROID(
>   .usage = imageUsage,
>    };
> 
>  +   /* Android P and earlier doesn't check if the physical 
>  device supports a
>  +* given format and usage combination before calling this 
>  function. Omit the
>  +* storage requirement to make the tests pass.
>  +*/
>  +#if ANDROID_API_LEVEL <= 28
>  +   if (format == VK_FORMAT_R8G8B8A8_SRGB ||
>  +   format == VK_FORMAT_R5G6B5_UNORM_PACK16) {
>  +  image_format_info.usage &= ~VK_IMAGE_USAGE_STORAGE_BIT;
>  +   }
>  +#endif
> >>>
> >>> I don't think you need this. Per the vulkan spec you can only use 
> >>> an
> >>> format + usage combination for a swapchain if it is supported per
> >>> ImageFormatProperties, using essentially the same check happening
> >>> above. I know CTs has been bad at this, but Vulkan CTS should have
> >>> been fixed for a bit now. (I don't think all the fixes are in 
> >>> Android
> >>> CTS 9.0_r4 yet, maybe the next release?)
> >>
> >> AFAIK the problem here is not about CTS. It's the swapchain
> >> implementation that always requires storage support.
> >
> > Actually swapchain 

Re: [Mesa-dev] [ANNOUNCE] mesa 18.3.1

2019-01-07 Thread Andres Gomez
Emil, the 18.3.2 should have already happened by the 19th of December.

Is there anything stopping you from going ahead with it?

I've also noticed that there is a 3 weeks gap (instead of 2) from
18.3.2 to 18.3.3. I suppose you decided that due to most of the people 
being on XMas and New Year holidays (?). It went unnoticed by me into
the calendar since it landed without review.

Have into account that we can help to keep the pace of the releases
constant and steady, which is what we want.

Please, do rise your hand ASAP whenever you feel you won't have time
for a release. Juan, Dylan or I can step in and take over for just a
punctual release or from that moment on, if needed. I, for example, was
working during most of the last weeks and could have helped with 18.3.2
and 18.3.3

Anyway, do you need help for making happen 18.3.2 this week?

Br.

On Tue, 2018-12-11 at 21:42 +, Emil Velikov wrote:
> Mesa 18.3.1 is now available.
> 
> This version disables the VK_EXT_pci_bus_info extension due to last
> minute issues spotted in the specification.
> 
> 
> Emil Velikov (3):
>   docs: add sha256 checksums for 18.3.0
>   Update version to 18.3.1
>   docs: add release notes for 18.3.1
> 
> Jason Ekstrand (1):
>   anv,radv: Disable VK_EXT_pci_bus_info
> 
> git tag: mesa-18.3.1
> 
> https://mesa.freedesktop.org/archive/mesa-18.3.1.tar.gz
> MD5:  2de82245518020872fee4c2f9a8c709b  mesa-18.3.1.tar.gz
> SHA1: 103cb6e8d52ea82ba30ecd546f4ca5c63ceef2e4  mesa-18.3.1.tar.gz
> SHA256: 256d0c3d88e380c1b8e3fc5c6ac34001e3b7c30458b8b852407ec68b8ccd9fda  
> mesa-18.3.1.tar.gz
> SHA512: 
> 16e5b52246bcb8c014b59bf7d0ad77b0e350bca212c2ee3e2b8a66bbed59d2f8e2a557f210ea45f98db988039ebb348cb69acf77505fb8e33b29da5efb5307de
>   mesa-18.3.1.tar.gz
> PGP:  https://mesa.freedesktop.org/archive/mesa-18.3.1.tar.gz.sig
> 
> https://mesa.freedesktop.org/archive/mesa-18.3.1.tar.xz
> MD5:  d60828056d77bfdbae0970f9b15fb1be  mesa-18.3.1.tar.xz
> SHA1: 50ba2d37647fea77ea19416e8a6ffed34c313330  mesa-18.3.1.tar.xz
> SHA256: 5b1f827d28684a25f6657289f8b7d47ac56395988c7ac23e0ec9a62b644bdc63  
> mesa-18.3.1.tar.xz
> SHA512: 
> a68d39158cf1e868d70730d0641a0cfe4c6e5b3cd1bc0c47f54022402aca03503933084f6ddc722bf88c9b6d1281ba5c847ec4fed8092a9b33f90527d08e12db
>   mesa-18.3.1.tar.xz
> PGP:  https://mesa.freedesktop.org/archive/mesa-18.3.1.tar.xz.sig
> 
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
-- 
Br,

Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] etnaviv_screen: Fix typo in cflush_all description

2019-01-07 Thread Guido Günther
Signed-off-by: Guido Günther 
---
 src/gallium/drivers/etnaviv/etnaviv_screen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index fb51aa5f4e8..fd320232528 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -63,7 +63,7 @@ static const struct debug_named_value debug_options[] = {
{"no_autodisable", ETNA_DBG_NO_AUTODISABLE, "Disable autodisable"},
{"no_supertile",   ETNA_DBG_NO_SUPERTILE, "Disable supertiles"},
{"no_early_z", ETNA_DBG_NO_EARLY_Z, "Disable early z"},
-   {"cflush_all", ETNA_DBG_CFLUSH_ALL, "Flush every cash before state 
update"},
+   {"cflush_all", ETNA_DBG_CFLUSH_ALL, "Flush every cache before state 
update"},
{"msaa2x", ETNA_DBG_MSAA_2X, "Force 2x msaa"},
{"msaa4x", ETNA_DBG_MSAA_4X, "Force 4x msaa"},
{"flush_all",  ETNA_DBG_FLUSH_ALL, "Flush after every rendered 
primitive"},
-- 
2.19.2
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/10] intel/fs: Fix bug in lower_simd_width while splitting an instruction which was already split.

2019-01-07 Thread Iago Toral
On Sat, 2018-12-29 at 12:38 -0800, Francisco Jerez wrote:
> This seems to be a problem in combination with the lower_regioning
> pass introduced by a future commit, which can modify a SIMD-split
> instruction causing its execution size to become illegal again.  A
> subsequent call to lower_simd_width() would hit this bug on a future
> platform.
> 
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>  src/intel/compiler/brw_fs.cpp | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_fs.cpp
> b/src/intel/compiler/brw_fs.cpp
> index 97544fdf465..4aacc72a1b7 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -5666,7 +5666,7 @@ static fs_reg
>  emit_unzip(const fs_builder , fs_inst *inst, unsigned i)
>  {
> /* Specified channel group from the source region. */
> -   const fs_reg src = horiz_offset(inst->src[i], lbld.group());
> +   const fs_reg src = horiz_offset(inst->src[i], lbld.group() -
> inst->group);

Should we assert that lbld.group >= inst->group? Same below.

> if (needs_src_copy(lbld, inst, i)) {
>/* Builder of the right width to perform the copy avoiding
> uninitialized
> @@ -5757,7 +5757,7 @@ emit_zip(const fs_builder _before, const
> fs_builder _after,
> assert(lbld_before.group() == lbld_after.group());
>  
> /* Specified channel group from the destination region. */
> -   const fs_reg dst = horiz_offset(inst->dst, lbld_after.group());
> +   const fs_reg dst = horiz_offset(inst->dst, lbld_after.group() -
> inst->group);
> const unsigned dst_size = inst->size_written /
>inst->dst.component_size(inst->exec_size);
>  

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] NIR constant problem for GPU which doesn't have native integer support

2019-01-07 Thread Erik Faye-Lund
On Fri, 2019-01-04 at 09:40 -0600, Jason Ekstrand wrote:
> On Fri, Jan 4, 2019 at 4:07 AM Erik Faye-Lund <
> erik.faye-l...@collabora.com> wrote:
> > On Thu, 2019-01-03 at 11:58 -0600, Jason Ekstrand wrote:
> > > On Thu, Jan 3, 2019 at 3:39 AM Erik Faye-Lund <
> > > erik.faye-l...@collabora.com> wrote:
> > > > On Wed, 2019-01-02 at 10:16 -0600, Jason Ekstrand wrote:
> > > > > On Wed, Jan 2, 2019 at 9:43 AM Ilia Mirkin <
> > imir...@alum.mit.edu>
> > > > > wrote:
> > > > > > Have a look at the first 4 patches in the series from
> > Jonathan
> > > > > > Marek
> > > > > > to address some of these issues:
> > > > > > 
> > > > > > https://patchwork.freedesktop.org/series/54295/
> > > > > > 
> > > > > > Not sure exactly what state that work is in, but I've added
> > > > > > Jonathan
> > > > > > to CC, perhaps he can provide an update.
> > > > > > 
> > > > > > Cheers,
> > > > > > 
> > > > > >   -ilia
> > > > > > 
> > > > > > On Wed, Jan 2, 2019 at 6:28 AM Qiang Yu 
> > > > wrote:
> > > > > > >
> > > > > > > Hi guys,
> > > > > > >
> > > > > > > I found the problem with this test fragment shader when
> > lima
> > > > > > development:
> > > > > > > uniform int color;
> > > > > > > void main() {
> > > > > > > if (color > 1)
> > > > > > > gl_FragColor = vec4(1.0, 0.0, 0.0, 1);
> > > > > > > else
> > > > > > > gl_FragColor = vec4(0.0, 1.0, 0.0, 1);
> > > > > > > }
> > > > > > >
> > > > > > > nir_print_shader output:
> > > > > > > impl main {
> > > > > > > block block_0:
> > > > > > > /* preds: */
> > > > > > > vec1 32 ssa_0 = load_const (0x0001 /*
> > 0.00
> > > > */)
> > > > > > > vec4 32 ssa_1 = load_const (0x3f80 /*
> > 1.00
> > > > */,
> > > > > > > 0x /* 0.00 */, 0x /* 0.00 */,
> > > > 0x3f80
> > > > > > /*
> > > > > > > 1.00 */)
> > > > > > > vec4 32 ssa_2 = load_const (0x /*
> > 0.00
> > > > */,
> > > > > > > 0x3f80 /* 1.00 */, 0x /* 0.00 */,
> > > > 0x3f80
> > > > > > /*
> > > > > > > 1.00 */)
> > > > > > > vec1 32 ssa_3 = load_const (0x /*
> > 0.00
> > > > */)
> > > > > > > vec1 32 ssa_4 = intrinsic load_uniform (ssa_3)
> > (0, 1,
> > > > 0)
> > > > > > /*
> > > > > > > base=0 */ /* range=1 */ /* component=0 */   /* color */
> > > > > > > vec1 32 ssa_5 = slt ssa_0, ssa_4
> > > > > > > vec1 32 ssa_6 = fnot ssa_5
> > > > > > > vec4 32 ssa_7 = bcsel ssa_6., ssa_2, ssa_1
> > > > > > > intrinsic store_output (ssa_7, ssa_3) (0, 15, 0)
> > /*
> > > > > > base=0 */
> > > > > > > /* wrmask=xyzw */ /* component=0 */   /* gl_FragColor
> > */
> > > > > > > /* succs: block_1 */
> > > > > > > block block_1:
> > > > > > > }
> > > > > > >
> > > > > > > ssa0 is not converted to float when glsl to nir. I see
> > > > > > glsl_to_nir.cpp
> > > > > > > will create flt/ilt/ult
> > > > > > > based on source type for gpu support native integer, but
> > for
> > > > gpu
> > > > > > not
> > > > > > > support native
> > > > > > > integer, just create slt for all source type. And in
> > > > > > > nir_lower_constant_initializers,
> > > > > > > there's also no type conversion for integer constant.
> > > > > 
> > > > > This is a generally sticky issue.  In NIR, we have no concept
> > of
> > > > > types on SSA values which has proven perfectly reasonable and
> > > > > actually very powerful in a world where integers are
> > supported
> > > > > natively.  Unfortunately, it causes significant problems for
> > > > float-
> > > > > only architectures.
> > > > 
> > > > I would like to take this chance to say that this untyped SSA-
> > value
> > > > choice has lead to issues in both radeon_si (because LLVM
> > values
> > > > are
> > > > typed) and zink (similarly, because SPIR-V values are typed),
> > where
> > > > we
> > > > need to to bitcasts on every access because there's just not
> > enough
> > > > information available to emit variables with the right type.
> > > 
> > > I'm not sure if I agree that the two problems are the same or
> > not... 
> > > More on that in a bit.
> > >  
> > > > It took us a lot of time to realize that the meta-data from the
> > > > opcodes
> > > > doesn't *really* provide this, because the rest of nir doesn't
> > > > treat
> > > > values consistently. In fact, this feels arguably more like
> > buggy
> > > > behavior; why do we even have fmov when all of the time the
> > > > compiler
> > > > will emit imovs for floating-point values...? Or why do we have
> > > > bitcast
> > > 
> > > Why do we have different mov opcodes?  Because they have
> > different
> > > behavior in the presence of source/destination modifiers.
> > 
> > Is this general NIR-behavior (i.e will this be honored by constant
> > folding etc), or is it Intel specific? If it's NIR-behavior, is it
> > documented somewhere?
> 
> No, constant folding doesn't do modifiers.  I had completely
> forgotten about this fact until 

Re: [Mesa-dev] [PATCH] anv/android: handle storage images in vkGetSwapchainGrallocUsageANDROID

2019-01-07 Thread Tapani Pälli



On 1/7/19 11:56 AM, Bas Nieuwenhuizen wrote:

On Wed, Dec 5, 2018 at 1:05 PM Tapani Pälli  wrote:




On 12/5/18 2:00 PM, Bas Nieuwenhuizen wrote:

On Wed, Dec 5, 2018 at 12:51 PM Tapani Pälli  wrote:




On 12/5/18 1:44 PM, Bas Nieuwenhuizen wrote:

On Wed, Dec 5, 2018 at 12:37 PM Tapani Pälli  wrote:




On 12/5/18 1:22 PM, Bas Nieuwenhuizen wrote:

On Wed, Dec 5, 2018 at 12:15 PM Tapani Pälli  wrote:




On 12/5/18 1:01 PM, Bas Nieuwenhuizen wrote:

On Fri, Sep 7, 2018 at 12:54 AM Kevin Strasser  wrote:


Android P and earlier expect that the surface supports storage images, and
so many of the tests fail when the framework checks for that support. The
framework also includes various image format and usage combinations that are
invalid for the hardware.

Drop the STORAGE restriction from the HAL and whitelist a pair of
formats so that existing versions of Android can pass these tests.

Fixes:
dEQP-VK.wsi.android.*

Signed-off-by: Kevin Strasser 
---
  src/intel/vulkan/anv_android.c | 23 ++-
  1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/intel/vulkan/anv_android.c b/src/intel/vulkan/anv_android.c
index 46c41d5..e2640b8 100644
--- a/src/intel/vulkan/anv_android.c
+++ b/src/intel/vulkan/anv_android.c
@@ -234,7 +234,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
 *grallocUsage = 0;
 intel_logd("%s: format=%d, usage=0x%x", __func__, format, imageUsage);

-   /* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags
+   /* WARNING: Android's libvulkan.so hardcodes the VkImageUsageFlags
  * returned to applications via 
VkSurfaceCapabilitiesKHR::supportedUsageFlags.
  * The relevant code in libvulkan/swapchain.cpp contains this fun 
comment:
  *
@@ -247,7 +247,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
  * dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
  */

-   const VkPhysicalDeviceImageFormatInfo2KHR image_format_info = {
+   VkPhysicalDeviceImageFormatInfo2KHR image_format_info = {


Why remove the const here?


.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR,
.format = format,
.type = VK_IMAGE_TYPE_2D,
@@ -255,6 +255,17 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
.usage = imageUsage,
 };

+   /* Android P and earlier doesn't check if the physical device supports a
+* given format and usage combination before calling this function. Omit the
+* storage requirement to make the tests pass.
+*/
+#if ANDROID_API_LEVEL <= 28
+   if (format == VK_FORMAT_R8G8B8A8_SRGB ||
+   format == VK_FORMAT_R5G6B5_UNORM_PACK16) {
+  image_format_info.usage &= ~VK_IMAGE_USAGE_STORAGE_BIT;
+   }
+#endif


I don't think you need this. Per the vulkan spec you can only use an
format + usage combination for a swapchain if it is supported per
ImageFormatProperties, using essentially the same check happening
above. I know CTs has been bad at this, but Vulkan CTS should have
been fixed for a bit now. (I don't think all the fixes are in Android
CTS 9.0_r4 yet, maybe the next release?)


AFAIK the problem here is not about CTS. It's the swapchain
implementation that always requires storage support.


Actually swapchain creation has the following valid usage rule:

"The implied image creation parameters of the swapchain must be
supported as reported by vkGetPhysicalDeviceImageFormatProperties"

So since those formats don't support the STORAGE usage bit, that test
fails and you are not allowed to create a swapchain with those formats
and storage, even if the surface capabiliities expose the STORAGE
usage bit in general.


Right ... this stuff was done because comment in the swapchain setting
the bits seems like maybe it's not thought through:

// TODO(jessehall): I think these are right, but haven't thought hard about
// it. Do we need to query the driver for support of any of these?


That was from before the spec was changed to add that rule.


OK if I understand correctly, so should we rather then try to fix those
tests to skip instead of fail?


They should be fixed with:
https://github.com/KhronosGroup/VK-GL-CTS/commit/49eab80e4a8b3af1790b9ac88b096aa9bffd193f#diff-8369d6640a2c6ad0c0fc1d85b113faeb
https://github.com/KhronosGroup/VK-GL-CTS/commit/858f5396a4f63223fcf31f717d23b4b552e10182#diff-8369d6640a2c6ad0c0fc1d85b113faeb


Thanks, will try with these!


Hi,

Did you have any luck with this? This patch (or mine) are still
pending review based on this?


Sorry I've forgotten this but will get to this now. Could you please 
pinpoint which patch from you was referred here?




Thanks,
Bas











(Also silently removing the usage bit is bad, because the app could
try actually using images stores with the image ...)


True, it is not nice ..



+
 VkImageFormatProperties2KHR image_format_props = {
.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR,
 };
@@ 

Re: [Mesa-dev] [PATCH] radv: Implement buffer stores with less than 4 components.

2019-01-07 Thread Samuel Pitoiset

Reviewed-by: Samuel Pitoiset 

On 12/24/18 3:43 PM, Bas Nieuwenhuizen wrote:

We started using it in the btoi paths for r32g32b32, and the LLVM IR
checker will complain about it because we end up with intrinsics with
the wrong type extension in the name.

Fixes: 593996bc02 ("radv: implement buffer to image operations for R32G32B32")
---
  src/amd/common/ac_nir_to_llvm.c | 19 ++-
  1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 6d97212b805..0fa38b83a57 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2392,24 +2392,33 @@ static void visit_image_store(struct ac_nir_context 
*ctx,
glc = ctx->ac.i1true;
  
  	if (dim == GLSL_SAMPLER_DIM_BUF) {

+   char name[48];
+   const char *types[] = { "f32", "v2f32", "v4f32" };
LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, 
true);
+   LLVMValueRef src = ac_to_float(>ac, get_src(ctx, 
instr->src[3]));
+   unsigned src_channels = ac_get_llvm_num_components(src);
  
-		params[0] = ac_to_float(>ac, get_src(ctx, instr->src[3])); /* data */

+   if (src_channels == 3)
+   src = ac_build_expand(>ac, src, 3, 4);
+
+   params[0] = src; /* data */
params[1] = rsrc;
params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, 
instr->src[1]),
ctx->ac.i32_0, ""); /* 
vindex */
params[3] = ctx->ac.i32_0; /* voffset */
+   snprintf(name, sizeof(name), "%s.%s",
+HAVE_LLVM >= 0x800 ? 
"llvm.amdgcn.struct.buffer.store.format"
+   : "llvm.amdgcn.buffer.store.format",
+types[CLAMP(src_channels, 1, 3) - 1]);
+
if (HAVE_LLVM >= 0x800) {
params[4] = ctx->ac.i32_0; /* soffset */
params[5] = glc ? ctx->ac.i32_1 : ctx->ac.i32_0;
-   ac_build_intrinsic(>ac, 
"llvm.amdgcn.struct.buffer.store.format.v4f32", ctx->ac.voidt,
-  params, 6, 0);
} else {
params[4] = glc;  /* glc */
params[5] = ctx->ac.i1false;  /* slc */
-   ac_build_intrinsic(>ac, 
"llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt,
-  params, 6, 0);
}
+   ac_build_intrinsic(>ac, name, ctx->ac.voidt, params, 6, 0);
} else {
struct ac_image_args args = {};
args.opcode = ac_image_store;

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: skip draws with instance_count == 0

2019-01-07 Thread Samuel Pitoiset
Loosely based on RadeonSI.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 1966098e08c..8e3f1a7f5f0 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -3697,6 +3697,20 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,
radeon_check_space(cmd_buffer->device->ws,
   cmd_buffer->cs, 4096);
 
+   if (likely(!info->indirect)) {
+   /* SI-CI treat instance_count==0 as instance_count==1. There is
+* no workaround for indirect draws, but we can at least skip
+* direct draws.
+*/
+   if (unlikely(!info->instance_count))
+   return;
+
+   /* Handle count == 0. */
+   if (unlikely(!info->count &&
+(info->indexed || !info->strmout_buffer)))
+   return;
+   }
+
/* Use optimal packet order based on whether we need to sync the
 * pipeline.
 */
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/10] intel/fs: Respect CHV/BXT regioning restrictions in copy propagation pass.

2019-01-07 Thread Francisco Jerez
Iago Toral  writes:

> On Sat, 2018-12-29 at 12:38 -0800, Francisco Jerez wrote:
>> Currently the visitor attempts to enforce the regioning restrictions
>> that apply to double-precision instructions on CHV/BXT at NIR-to-i965
>> translation time.  It is possible though for the copy propagation
>> pass
>> to violate this restriction if a strided move is propagated into one
>> of the affected instructions.  I've only reproduced this issue on a
>> future platform but it could affect CHV/BXT too under the right
>> conditions.
>> 
>> Cc: mesa-sta...@lists.freedesktop.org
>> ---
>>  .../compiler/brw_fs_copy_propagation.cpp  | 10 +++
>>  src/intel/compiler/brw_ir_fs.h| 28
>> +++
>>  2 files changed, 38 insertions(+)
>> 
>> diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp
>> b/src/intel/compiler/brw_fs_copy_propagation.cpp
>> index a8ec1c34630..c23ce1ef426 100644
>> --- a/src/intel/compiler/brw_fs_copy_propagation.cpp
>> +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
>> @@ -315,6 +315,16 @@ can_take_stride(fs_inst *inst, unsigned arg,
>> unsigned stride,
>> if (stride > 4)
>>return false;
>>  
>> +   /* Bail if the channels of the source need to be aligned to the
>> byte offset
>> +* of the corresponding channel of the destination, and the
>> provided stride
>> +* would break this restriction.
>> +*/
>> +   if (has_dst_aligned_region_restriction(devinfo, inst) &&
>> +   !(type_sz(inst->src[arg].type) * stride ==
>> +   type_sz(inst->dst.type) * inst->dst.stride ||
>> + stride == 0))
>> +  return false;
>> +
>> /* 3-source instructions can only be Align16, which restricts
>> what strides
>>  * they can take. They can only take a stride of 1 (the usual
>> case), or 0
>>  * with a special "repctrl" bit. But the repctrl bit doesn't work
>> for
>> diff --git a/src/intel/compiler/brw_ir_fs.h
>> b/src/intel/compiler/brw_ir_fs.h
>> index 07e7224e0f8..95b069a2e02 100644
>> --- a/src/intel/compiler/brw_ir_fs.h
>> +++ b/src/intel/compiler/brw_ir_fs.h
>> @@ -486,4 +486,32 @@ get_exec_type_size(const fs_inst *inst)
>> return type_sz(get_exec_type(inst));
>>  }
>>  
>> +/**
>> + * Return whether the following regioning restriction applies to the
>> specified
>> + * instruction.  From the Cherryview PRM Vol 7. "Register Region
>> + * Restrictions":
>> + *
>> + * "When source or destination datatype is 64b or operation is
>> integer DWord
>> + *  multiply, regioning in Align1 must follow these rules:
>> + *
>> + *  1. Source and Destination horizontal stride must be aligned to
>> the same qword.
>> + *  2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
>> + *  3. Source and Destination offset must be the same, except the
>> case of
>> + * scalar source."
>> + */
>> +static inline bool
>> +has_dst_aligned_region_restriction(const gen_device_info *devinfo,
>> +   const fs_inst *inst)
>> +{
>> +   const brw_reg_type exec_type = get_exec_type(inst);
>> +   const bool is_int_multiply =
>> !brw_reg_type_is_floating_point(exec_type) &&
>> + (inst->opcode == BRW_OPCODE_MUL || inst->opcode ==
>> BRW_OPCODE_MAD);
>
> Should this be extended to include MAC and MACH too?
>

The documentation is unclear, but it doesn't look like that's the case
according to the simulator, because those instructions don't do more
than a 16x16 or 32x16 bit integer multiply respectively.

>> +
>> +   if (type_sz(inst->dst.type) > 4 || type_sz(exec_type) > 4 ||
>> +   (type_sz(exec_type) == 4 && is_int_multiply))
>> +  return devinfo->is_cherryview ||
>> gen_device_info_is_9lp(devinfo);
>
> How about:
>
> if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
>...
> } else {
>return false;
> }
>
> since we only really need to do these checks in those platforms it
> might make a bit more sense to do it this way.
>

Right now the difference is purely cosmetic, but in the future that
won't work for the platform this was designed for, I can send you more
details off-list.

>> +   else
>> +  return false;
>> +}
>> +
>>  #endif


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC] clover/meson: Ignore 'svn' suffix when computing CLANG_RESOURCE_DIR

2019-01-07 Thread Pierre Moreau
On 2019-01-07 — 11:34, Dylan Baker wrote:
[snip]
> I think you can just use dep_llvm.version() now, which does split the 'svn' or
> 'git' off the end already. When this was written it didn't do that.

Thanks, it does indeed work and feels way less hacky than my split; I’ll send a
v2 shortly.

Pierre


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109204] [regression, bisected] retroarch's crt-royale shader crash radv

2019-01-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109204

--- Comment #14 from Brandon Wright  ---
(In reply to oschowa from comment #13)
> With this patch applied it works correctly again, thanks!

Also confirmed here.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 02/10] intel/fs: Implement quad swizzles on ICL+.

2019-01-07 Thread Francisco Jerez
Iago Toral  writes:

> On Sat, 2018-12-29 at 12:38 -0800, Francisco Jerez wrote:
>> Align16 is no longer a thing, so a new implementation is provided
>> using Align1 instead.  Not all possible swizzles can be represented
>> as
>> a single Align1 region, but some fast paths are provided for
>> frequently used swizzles that can be represented efficiently in
>> Align1
>> mode.
>> 
>> Fixes ~90 subgroup quad swap Vulkan CTS tests.
>> 
>> Cc: mesa-sta...@lists.freedesktop.org
>> ---
>>  src/intel/compiler/brw_fs.cpp   | 25 +++-
>>  src/intel/compiler/brw_fs.h |  4 ++
>>  src/intel/compiler/brw_fs_generator.cpp | 82 ---
>> --
>>  3 files changed, 93 insertions(+), 18 deletions(-)
>> 
>> diff --git a/src/intel/compiler/brw_fs.cpp
>> b/src/intel/compiler/brw_fs.cpp
>> index 2f0f0151219..97544fdf465 100644
>> --- a/src/intel/compiler/brw_fs.cpp
>> +++ b/src/intel/compiler/brw_fs.cpp
>> @@ -315,6 +315,20 @@ fs_inst::has_source_and_destination_hazard()
>> const
>> * may stomp all over it.
>> */
>>return true;
>> +   case SHADER_OPCODE_QUAD_SWIZZLE:
>> +  switch (src[1].ud) {
>
> Maybe it is worth adding a small comment here indicating that these are
> the cases where we implement the opcode as a single instruction and
> refer to the generator for details?
>

Yeah, fixed up locally.

>> +  case BRW_SWIZZLE_:
>> +  case BRW_SWIZZLE_:
>> +  case BRW_SWIZZLE_:
>> +  case BRW_SWIZZLE_:
>> +  case BRW_SWIZZLE_XXZZ:
>> +  case BRW_SWIZZLE_YYWW:
>> +  case BRW_SWIZZLE_XYXY:
>> +  case BRW_SWIZZLE_ZWZW:
>> + return false;
>> +  default:
>> + return !is_uniform(src[0]);
>
> Shouldn't this be:
>
> return !is_uniform(src[0]) ||
>(devinfo->gen < 11 && type_sz(src.type) == 4);
>
> Since in that case we also implement the opcode with a single ALIGN16
> instruction.
>

Not really.  Maybe you mean "!is_uniform(src[0]) &&
(devinfo->gen >= 11 || type_sz(src.type) != 4)" instead?  That would be
somewhat more accurate than the expression in my patch, but
unfortunately the devinfo pointer is not available here.  I wouldn't
mind plumbing it through but patch is meant for mesa-stable, and it
shouldn't affect correctness to be more strict than necessary regarding
source/destination hazards.

>> +  }
>> default:
>>/* The SIMD16 compressed instruction
>> *
>> @@ -5579,9 +5593,14 @@ get_lowered_simd_width(const struct
>> gen_device_info *devinfo,
>> case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
>>return MIN2(8, inst->exec_size);
>>  
>> -   case SHADER_OPCODE_QUAD_SWIZZLE:
>> -  return 8;
>> -
>> +   case SHADER_OPCODE_QUAD_SWIZZLE: {
>> +  const unsigned swiz = inst->src[1].ud;
>> +  return (is_uniform(inst->src[0]) ?
>> + get_fpu_lowered_simd_width(devinfo, inst) :
>> +  devinfo->gen < 11 && type_sz(inst->src[0].type) == 4 ?
>> 8 :
>> +  swiz == BRW_SWIZZLE_XYXY || swiz == BRW_SWIZZLE_ZWZW ?
>> 4 :
>> +  get_fpu_lowered_simd_width(devinfo, inst));
>> +   }
>> case SHADER_OPCODE_MOV_INDIRECT: {
>>/* From IVB and HSW PRMs:
>> *
>> diff --git a/src/intel/compiler/brw_fs.h
>> b/src/intel/compiler/brw_fs.h
>> index 53d9b6ce7bf..dc36ecc21ac 100644
>> --- a/src/intel/compiler/brw_fs.h
>> +++ b/src/intel/compiler/brw_fs.h
>> @@ -480,6 +480,10 @@ private:
>>   struct brw_reg src,
>>   struct brw_reg idx);
>>  
>> +   void generate_quad_swizzle(const fs_inst *inst,
>> +  struct brw_reg dst, struct brw_reg
>> src,
>> +  unsigned swiz);
>> +
>> bool patch_discard_jumps_to_fb_writes();
>>  
>> const struct brw_compiler *compiler;
>> diff --git a/src/intel/compiler/brw_fs_generator.cpp
>> b/src/intel/compiler/brw_fs_generator.cpp
>> index 08dd83dded7..84627e83132 100644
>> --- a/src/intel/compiler/brw_fs_generator.cpp
>> +++ b/src/intel/compiler/brw_fs_generator.cpp
>> @@ -582,6 +582,72 @@ fs_generator::generate_shuffle(fs_inst *inst,
>> }
>>  }
>>  
>> +void
>> +fs_generator::generate_quad_swizzle(const fs_inst *inst,
>> +struct brw_reg dst, struct
>> brw_reg src,
>> +unsigned swiz)
>> +{
>> +   /* Requires a quad. */
>> +   assert(inst->exec_size >= 4);
>> +
>> +   if (src.file == BRW_IMMEDIATE_VALUE ||
>> +   has_scalar_region(src)) {
>> +  /* The value is uniform across all channels */
>> +  brw_MOV(p, dst, src);
>> +
>> +   } else if (devinfo->gen < 11 && type_sz(src.type) == 4) {
>> +  /* This only works on 8-wide 32-bit values */
>> +  assert(inst->exec_size == 8);
>> +  assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
>> +  assert(src.vstride == src.width + 1);
>> +  brw_set_default_access_mode(p, BRW_ALIGN_16);
>> +  struct brw_reg swiz_src = stride(src, 4, 

Re: [Mesa-dev] [PATCH 09/12] nir: add legal bit_sizes to intrinsics

2019-01-07 Thread Jason Ekstrand
On Tue, Dec 4, 2018 at 12:27 PM Karol Herbst  wrote:

> With OpenCL some system values match the address bits, but in GLSL we also
> have some system values being 64 bit like subgroup masks.
>
> With this it is possible to adjust the builder functions so that depending
> on the bit_sizes the correct bit_size is used or an additional argument is
> added in case of multiple possible values.
>
> v2: validate dest bit_size
>
> Signed-off-by: Karol Herbst 
> ---
>  src/compiler/nir/nir.h   |  3 +++
>  src/compiler/nir/nir_intrinsics.py   | 25 +++--
>  src/compiler/nir/nir_intrinsics_c.py |  6 +-
>  src/compiler/nir/nir_validate.c  |  6 ++
>  4 files changed, 29 insertions(+), 11 deletions(-)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index e9f8f15d387..c5ea8dcdd1e 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -1297,6 +1297,9 @@ typedef struct {
>
> /** semantic flags for calls to this intrinsic */
> nir_intrinsic_semantic_flag flags;
> +
> +   /** bitfield of legal bit sizes */
> +   unsigned bit_sizes : 7;
>

This should be called dest_bit_sizes and be after dest_components.  Also
the bitfield :7 is really pointless given how many other things we have in
this struct that are simply declared "unsigned".  If we're going to make it
a bitfield (probably a good idea anyway), we should do so across the board.


>  } nir_intrinsic_info;
>
>  extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
> diff --git a/src/compiler/nir/nir_intrinsics.py
> b/src/compiler/nir/nir_intrinsics.py
> index 6ea6ad1198f..830c406b450 100644
> --- a/src/compiler/nir/nir_intrinsics.py
> +++ b/src/compiler/nir/nir_intrinsics.py
> @@ -32,7 +32,7 @@ class Intrinsic(object):
> NOTE: this must be kept in sync with nir_intrinsic_info.
> """
> def __init__(self, name, src_components, dest_components,
> -indices, flags, sysval):
> +indices, flags, sysval, bit_sizes):
> """Parameters:
>
> - name: the intrinsic name
> @@ -45,6 +45,7 @@ class Intrinsic(object):
> - indices: list of constant indicies
> - flags: list of semantic flags
> - sysval: is this a system-value intrinsic
> +   - bit_sizes: allowed dest bit_sizes
> """
> assert isinstance(name, str)
> assert isinstance(src_components, list)
> @@ -58,6 +59,8 @@ class Intrinsic(object):
> if flags:
> assert isinstance(flags[0], str)
> assert isinstance(sysval, bool)
> +   if bit_sizes:
> +   assert isinstance(bit_sizes[0], int)
>
> self.name = name
> self.num_srcs = len(src_components)
> @@ -68,6 +71,7 @@ class Intrinsic(object):
> self.indices = indices
> self.flags = flags
> self.sysval = sysval
> +   self.bit_sizes = bit_sizes
>
>  #
>  # Possible indices:
> @@ -123,10 +127,10 @@ CAN_REORDER   = "NIR_INTRINSIC_CAN_REORDER"
>  INTR_OPCODES = {}
>
>  def intrinsic(name, src_comp=[], dest_comp=-1, indices=[],
> -  flags=[], sysval=False):
> +  flags=[], sysval=False, bit_sizes=[]):
>  assert name not in INTR_OPCODES
>  INTR_OPCODES[name] = Intrinsic(name, src_comp, dest_comp,
> -   indices, flags, sysval)
> +   indices, flags, sysval, bit_sizes)
>
>  intrinsic("nop", flags=[CAN_ELIMINATE])
>
> @@ -448,9 +452,10 @@ intrinsic("shared_atomic_fmin",  src_comp=[1, 1],
> dest_comp=1, indices=[BASE])
>  intrinsic("shared_atomic_fmax",  src_comp=[1, 1], dest_comp=1,
> indices=[BASE])
>  intrinsic("shared_atomic_fcomp_swap", src_comp=[1, 1, 1], dest_comp=1,
> indices=[BASE])
>
> -def system_value(name, dest_comp, indices=[]):
> +def system_value(name, dest_comp, indices=[], bit_sizes=[32]):
>  intrinsic("load_" + name, [], dest_comp, indices,
> -  flags=[CAN_ELIMINATE, CAN_REORDER], sysval=True)
> +  flags=[CAN_ELIMINATE, CAN_REORDER], sysval=True,
> +  bit_sizes=bit_sizes)
>
>  system_value("frag_coord", 4)
>  system_value("front_face", 1)
> @@ -485,11 +490,11 @@ system_value("layer_id", 1)
>  system_value("view_index", 1)
>  system_value("subgroup_size", 1)
>  system_value("subgroup_invocation", 1)
> -system_value("subgroup_eq_mask", 0)
> -system_value("subgroup_ge_mask", 0)
> -system_value("subgroup_gt_mask", 0)
> -system_value("subgroup_le_mask", 0)
> -system_value("subgroup_lt_mask", 0)
> +system_value("subgroup_eq_mask", 0, bit_sizes=[32, 64])
> +system_value("subgroup_ge_mask", 0, bit_sizes=[32, 64])
> +system_value("subgroup_gt_mask", 0, bit_sizes=[32, 64])
> +system_value("subgroup_le_mask", 0, bit_sizes=[32, 64])
> +system_value("subgroup_lt_mask", 0, bit_sizes=[32, 64])
>  system_value("num_subgroups", 1)
>  system_value("subgroup_id", 1)
>  system_value("local_group_size", 3)
> diff --git a/src/compiler/nir/nir_intrinsics_c.py
> 

[Mesa-dev] [PATCH] radeonsi: Fix use of 1- or 2- component GL_DOUBLE vbo's.

2019-01-07 Thread Mario Kleiner
With Mesa 18.1, commit be973ed21f6e, si_llvm_load_input_vs()
changed the number of source 32-bit wide dword components
used for fetching vertex attributes into the vertex shader
from a constant 4 to a variable num_channels number, depending
on input data format, with some special case handling for
input data formats like 64-Bit doubles.

In the case of a GL_DOUBLE input data format with one
or two components though, e.g, submitted via ...

a) glTexCoordPointer(1, GL_DOUBLE, 0, buffer);
b) glTexCoordPointer(2, GL_DOUBLE, 0, buffer);

... the input format would be SI_FIX_FETCH_RG_64_FLOAT,
but no special case handling was implemented for that
case, so in the default path the number of 32-bit
dwords would be set to the number of float input components
derived from info->input_usage_mask. This ends with corrupted
input to the vertex shader, because fetching a 64-bit double
from the vbo requires fetching two 32-bit dwords instead of 1,
and fetching a two double input requires 4 dword fetches
instead of 2, so in these cases the vertex shader receives
incomplete/truncated input data:

a) float v = gl_MultiTexCoord0.x;  -> v.x is corrupted.
b) vec2  v = gl_MultiTexCoord0.xy; -> v.x is assigned
   correctly, but v.y is corrupted.

This happens with the standard TGSI IR compiled shaders.
Under NIR with R600_DEBUG=nir, we got correct behavior
because the current radeonsi nir code always assigns
info->input_usage_mask = TGSI_WRITEMASK_XYZW, thereby
always fetches 4 dwords regardless of what the shader
actually needs.

Fix this by properly assigning 2 or 4 dword fetches for
one or two component GL_DOUBLE input.

Fixes: be973ed21f6e ("radeonsi: load the right number of
   components for VS inputs and TBOs")

Signed-off-by: Mario Kleiner 
Cc: mesa-sta...@lists.freedesktop.org
Cc: Marek Olšák 
---
 src/gallium/drivers/radeonsi/si_shader.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 190edce..14bb875 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -561,6 +561,14 @@ void si_llvm_load_input_vs(
 
/* Do multiple loads for special formats. */
switch (fix_fetch) {
+   case SI_FIX_FETCH_RG_64_FLOAT:
+   num_fetches = 1; /* 1 2-dword or 4-dword load */
+   fetch_stride = 0;
+   if (util_last_bit(info->input_usage_mask[input_index]) >= 2)
+   num_channels = 4; /* 2 doubles in 4 dwords */
+   else
+   num_channels = 2; /* 1 double in 2 dwords */
+   break;
case SI_FIX_FETCH_RGB_64_FLOAT:
num_fetches = 3; /* 3 2-dword loads */
fetch_stride = 8;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radv: add support for VK_EXT_memory_budget

2019-01-07 Thread Bas Nieuwenhuizen
On Mon, Jan 7, 2019 at 6:20 PM Samuel Pitoiset
 wrote:
>
>
> On 1/7/19 6:06 PM, Alex Smith wrote:
>
> Hi Samuel,
>
> Thanks for implementing this - I've been wanting this extension for a while 
> so it's good it's finally available.
>
> This is just reporting the total heap sizes as the budget, which is the same 
> info we already get from the basic heap properties. The way I'd expected 
> budget to work (and what the spec is saying as far as I can see) is that it's 
> an estimate of how much is available for the calling app to use in that heap 
> at the time of the call, so should account for current system-wide usage of 
> the heap by other apps. Shouldn't this be something like (heap size - system 
> wide usage of the heap + current app usage of the heap)? (+ app usage since 
> the spec says budget includes currently allocated device memory)
>
> Hi Alex,
>
> Yes, I was also wondering about that. We can add per-process counters for 
> VRAM and GTT heaps, but I don't see how we can be accurate for the visible 
> VRAM heap.

Actually looking at the current counters we query, those are for
global usage, not the current process / drm fd usage. so we can
calculate budget that way. Now a way to get at the current usage in a
more accurate way than counting allocations (which does not reflect
migrated data).
>
> As said in the commit description, that implementation is really inacurate. 
> Though if you need something better I can improve.
>
> Note that I agree with you about the spec.
>
>
> Alex
>
> On Mon, 7 Jan 2019 at 16:35, Samuel Pitoiset  
> wrote:
>>
>> A simple Vulkan extension that allows apps to query size and
>> usage of all exposed memory heaps.
>>
>> The different usage values are not really accurate because
>> they are per drm-fd, but they should be close enough.
>>
>> Signed-off-by: Samuel Pitoiset 
>> ---
>>  src/amd/vulkan/radv_device.c  | 44 +++
>>  src/amd/vulkan/radv_extensions.py |  1 +
>>  2 files changed, 45 insertions(+)
>>
>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
>> index cef3a430555..32eaeb3b226 100644
>> --- a/src/amd/vulkan/radv_device.c
>> +++ b/src/amd/vulkan/radv_device.c
>> @@ -1352,12 +1352,56 @@ void radv_GetPhysicalDeviceMemoryProperties(
>> *pMemoryProperties = physical_device->memory_properties;
>>  }
>>
>> +static void
>> +radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
>> + VkPhysicalDeviceMemoryBudgetPropertiesEXT 
>> *memoryBudget)
>> +{
>> +   RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
>> +   VkPhysicalDeviceMemoryProperties *memory_properties = 
>> >memory_properties;
>> +   uint64_t visible_vram_size = radv_get_visible_vram_size(device);
>> +   uint64_t vram_size = radv_get_vram_size(device);
>> +   uint64_t gtt_size = device->rad_info.gart_size;
>> +
>> +   if (vram_size) {
>> +   memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM] = vram_size;
>> +   memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM] =
>> +   device->ws->query_value(device->ws, 
>> RADEON_VRAM_USAGE);
>> +   }
>> +
>> +   if (visible_vram_size) {
>> +   memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = 
>> visible_vram_size;
>> +   memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM_CPU_ACCESS] =
>> +   device->ws->query_value(device->ws, 
>> RADEON_VRAM_VIS_USAGE);
>> +   }
>> +
>> +   if (gtt_size) {
>> +   memoryBudget->heapBudget[RADV_MEM_HEAP_GTT] = gtt_size;
>> +   memoryBudget->heapUsage[RADV_MEM_HEAP_GTT] =
>> +   device->ws->query_value(device->ws, 
>> RADEON_GTT_USAGE);
>> +   }
>> +
>> +   /* The heapBudget and heapUsage values must be zero for array 
>> elements
>> +* greater than or equal to
>> +* VkPhysicalDeviceMemoryProperties::memoryHeapCount.
>> +*/
>> +   for (uint32_t i = memory_properties->memoryHeapCount; i < 
>> VK_MAX_MEMORY_HEAPS; i++) {
>> +   memoryBudget->heapBudget[i] = 0;
>> +   memoryBudget->heapUsage[i] = 0;
>> +   }
>> +}
>> +
>>  void radv_GetPhysicalDeviceMemoryProperties2(
>> VkPhysicalDevicephysicalDevice,
>> VkPhysicalDeviceMemoryProperties2KHR   *pMemoryProperties)
>>  {
>> radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
>>
>> >memoryProperties);
>> +
>> +   VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
>> +   vk_find_struct(pMemoryProperties->pNext,
>> +  PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
>> +   if (memory_budget)
>> +   radv_get_memory_budget_properties(physicalDevice, 
>> memory_budget);
>>  }
>>
>>  VkResult radv_GetMemoryHostPointerPropertiesEXT(
>> diff --git a/src/amd/vulkan/radv_extensions.py 
>> 

Re: [Mesa-dev] [PATCH 2/3] ac/nir_to_llvm: add ac_are_tessfactors_def_in_all_invocs()

2019-01-07 Thread Marek Olšák
On Mon, Dec 17, 2018 at 8:18 PM Timothy Arceri 
wrote:

> The following patch will use this with the radeonsi NIR backend
> but I've added it to ac so we can use it with RADV in future.
>
> This is a NIR implementation of the tgsi function
> tgsi_scan_tess_ctrl().
> ---
>  src/amd/common/ac_nir_to_llvm.c | 161 
>  src/amd/common/ac_nir_to_llvm.h |   2 +
>  2 files changed, 163 insertions(+)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c
> b/src/amd/common/ac_nir_to_llvm.c
> index 4294956de1..055940b75f 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -4063,3 +4063,164 @@ ac_lower_indirect_derefs(struct nir_shader *nir,
> enum chip_class chip_class)
>
> nir_lower_indirect_derefs(nir, indirect_mask);
>  }
> +
> +static unsigned
> +get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
> +{
> +   if (intrin->intrinsic != nir_intrinsic_store_deref)
> +   return 0;
> +
> +   nir_variable *var =
> +
>  nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0]));
> +
> +   if (var->data.mode != nir_var_shader_out)
> +   return 0;
> +
> +   unsigned writemask = 0;
> +   const int location = var->data.location;
> +   unsigned first_component = var->data.location_frac;
> +   unsigned num_comps = intrin->dest.ssa.num_components;
> +
> +   if (location == VARYING_SLOT_TESS_LEVEL_INNER)
> +   writemask = ((1 << num_comps + 1) - 1) << first_component;
>

Parentheses are missing in "1 << num_comps + 1".


> +   else if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
> +   writemask = (((1 << num_comps + 1) - 1) <<
> first_component) << 4;
>

Same here.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] radeonsi: Fix use of 1- or 2- component GL_DOUBLE vbo's.

2019-01-07 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Mon, Jan 7, 2019 at 1:42 PM Mario Kleiner 
wrote:

> With Mesa 18.1, commit be973ed21f6e, si_llvm_load_input_vs()
> changed the number of source 32-bit wide dword components
> used for fetching vertex attributes into the vertex shader
> from a constant 4 to a variable num_channels number, depending
> on input data format, with some special case handling for
> input data formats like 64-Bit doubles.
>
> In the case of a GL_DOUBLE input data format with one
> or two components though, e.g, submitted via ...
>
> a) glTexCoordPointer(1, GL_DOUBLE, 0, buffer);
> b) glTexCoordPointer(2, GL_DOUBLE, 0, buffer);
>
> ... the input format would be SI_FIX_FETCH_RG_64_FLOAT,
> but no special case handling was implemented for that
> case, so in the default path the number of 32-bit
> dwords would be set to the number of float input components
> derived from info->input_usage_mask. This ends with corrupted
> input to the vertex shader, because fetching a 64-bit double
> from the vbo requires fetching two 32-bit dwords instead of 1,
> and fetching a two double input requires 4 dword fetches
> instead of 2, so in these cases the vertex shader receives
> incomplete/truncated input data:
>
> a) float v = gl_MultiTexCoord0.x;  -> v.x is corrupted.
> b) vec2  v = gl_MultiTexCoord0.xy; -> v.x is assigned
>correctly, but v.y is corrupted.
>
> This happens with the standard TGSI IR compiled shaders.
> Under NIR with R600_DEBUG=nir, we got correct behavior
> because the current radeonsi nir code always assigns
> info->input_usage_mask = TGSI_WRITEMASK_XYZW, thereby
> always fetches 4 dwords regardless of what the shader
> actually needs.
>
> Fix this by properly assigning 2 or 4 dword fetches for
> one or two component GL_DOUBLE input.
>
> Fixes: be973ed21f6e ("radeonsi: load the right number of
>components for VS inputs and TBOs")
>
> Signed-off-by: Mario Kleiner 
> Cc: mesa-sta...@lists.freedesktop.org
> Cc: Marek Olšák 
> ---
>  src/gallium/drivers/radeonsi/si_shader.c | 8 
>  1 file changed, 8 insertions(+)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
> b/src/gallium/drivers/radeonsi/si_shader.c
> index 190edce..14bb875 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -561,6 +561,14 @@ void si_llvm_load_input_vs(
>
> /* Do multiple loads for special formats. */
> switch (fix_fetch) {
> +   case SI_FIX_FETCH_RG_64_FLOAT:
> +   num_fetches = 1; /* 1 2-dword or 4-dword load */
> +   fetch_stride = 0;
> +   if (util_last_bit(info->input_usage_mask[input_index]) >=
> 2)
> +   num_channels = 4; /* 2 doubles in 4 dwords */
> +   else
> +   num_channels = 2; /* 1 double in 2 dwords */
> +   break;
> case SI_FIX_FETCH_RGB_64_FLOAT:
> num_fetches = 3; /* 3 2-dword loads */
> fetch_stride = 8;
> --
> 2.7.4
>
> ___
> mesa-stable mailing list
> mesa-sta...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-stable
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCHv2 07/10] intel/fs: Introduce regioning lowering pass.

2019-01-07 Thread Francisco Jerez
Iago Toral  writes:

> On Sat, 2019-01-05 at 14:03 -0800, Francisco Jerez wrote:
>> This legalization pass is meant to handle situations where the source
>> or destination regioning controls of an instruction are unsupported
>> by
>> the hardware and need to be lowered away into separate instructions.
>> This should be more reliable and future-proof than the current
>> approach of handling CHV/BXT restrictions manually all over the
>> visitor.  The same mechanism is leveraged to lower unsupported type
>> conversions easily, which obsoletes the lower_conversions pass.
>> 
>> v2: Give conditional modifiers the same treatment as predicates for
>> SEL instructions in lower_dst_modifiers() (Iago).  Special-case a
>> couple of other instructions with inconsistent conditional mod
>> semantics in lower_dst_modifiers() (Curro).
>> ---
>>  src/intel/Makefile.sources|   1 +
>>  src/intel/compiler/brw_fs.cpp |   5 +-
>>  src/intel/compiler/brw_fs.h   |  21 +-
>>  src/intel/compiler/brw_fs_lower_regioning.cpp | 399
>> ++
>>  src/intel/compiler/brw_ir_fs.h|  10 +
>>  src/intel/compiler/meson.build|   1 +
>>  6 files changed, 418 insertions(+), 19 deletions(-)
>>  create mode 100644 src/intel/compiler/brw_fs_lower_regioning.cpp
>> 
>> diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
>> index 5e7d32293b7..6b9874d2b80 100644
>> --- a/src/intel/Makefile.sources
>> +++ b/src/intel/Makefile.sources
>> @@ -64,6 +64,7 @@ COMPILER_FILES = \
>>  compiler/brw_fs_live_variables.h \
>>  compiler/brw_fs_lower_conversions.cpp \
>>  compiler/brw_fs_lower_pack.cpp \
>> +compiler/brw_fs_lower_regioning.cpp \
>>  compiler/brw_fs_nir.cpp \
>>  compiler/brw_fs_reg_allocate.cpp \
>>  compiler/brw_fs_register_coalesce.cpp \
>> diff --git a/src/intel/compiler/brw_fs.cpp
>> b/src/intel/compiler/brw_fs.cpp
>> index 889509badab..caa7a798332 100644
>> --- a/src/intel/compiler/brw_fs.cpp
>> +++ b/src/intel/compiler/brw_fs.cpp
>> @@ -6471,7 +6471,10 @@ fs_visitor::optimize()
>>OPT(dead_code_eliminate);
>> }
>>  
>> -   if (OPT(lower_conversions)) {
>> +   progress = false;
>> +   OPT(lower_conversions);
>> +   OPT(lower_regioning);
>> +   if (progress) {
>
> This is a small nitpick but since this makes lower_conversions
> redundant, maybe it makes more sense to just remove the call to it here
> already in this patch so you don't have to reset the progress variable
> and simply do:
>
> if (OPT(lower_regioning)) {
>...
> }
>

The main reason for this is that in the event of a regression this will
allow identifying from the bisection result whether the reason for the
failure is the lack of a condition in the lower_regioning pass which was
previously handled by lower_conversions, or whether it's a bug in the
lowering code of lower_regioning itself.

>>OPT(opt_copy_propagation);
>>OPT(dead_code_eliminate);
>>OPT(lower_simd_width);
>> diff --git a/src/intel/compiler/brw_fs.h
>> b/src/intel/compiler/brw_fs.h
>> index dc36ecc21ac..36825754931 100644
>> --- a/src/intel/compiler/brw_fs.h
>> +++ b/src/intel/compiler/brw_fs.h
>> @@ -164,6 +164,7 @@ public:
>> void lower_uniform_pull_constant_loads();
>> bool lower_load_payload();
>> bool lower_pack();
>> +   bool lower_regioning();
>> bool lower_conversions();
>> bool lower_logical_sends();
>> bool lower_integer_multiplication();
>> @@ -536,24 +537,8 @@ namespace brw {
>>}
>> }
>>  
>> -   /**
>> -* Remove any modifiers from the \p i-th source region of the
>> instruction,
>> -* including negate, abs and any implicit type conversion to the
>> execution
>> -* type.  Instead any source modifiers will be implemented as a
>> separate
>> -* MOV instruction prior to the original instruction.
>> -*/
>> -   inline bool
>> -   lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst
>> *inst, unsigned i)
>> -   {
>> -  assert(inst->components_read(i) == 1);
>> -  const fs_builder ibld(v, block, inst);
>> -  const fs_reg tmp = ibld.vgrf(get_exec_type(inst));
>> -
>> -  ibld.MOV(tmp, inst->src[i]);
>> -  inst->src[i] = tmp;
>> -
>> -  return true;
>> -   }
>> +   bool
>> +   lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst
>> *inst, unsigned i);
>>  }
>>  
>>  void shuffle_from_32bit_read(const brw::fs_builder ,
>> diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp
>> b/src/intel/compiler/brw_fs_lower_regioning.cpp
>> new file mode 100644
>> index 000..d7c97e1442a
>> --- /dev/null
>> +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp
>> @@ -0,0 +1,399 @@
>> +/*
>> + * Copyright © 2018 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person
>> obtaining a
>> + * copy of this software and associated documentation files (the
>> "Software"),
>> + * to deal in the Software without 

[Mesa-dev] [PATCH v2] clover/meson: Ignore 'svn' suffix when computing CLANG_RESOURCE_DIR

2019-01-07 Thread Pierre Moreau
The version exported by LLVM in its CMake configuration files can
include the “svn” suffix when building a development version (for
example “8.0.0svn”). However the exported clang headers are still found
under “lib/clang/8.0.0/”, without the “svn” suffix.
Meson takes care of removing the “svn” suffix from the version when
using the dependency’s `version()` method.

This processing is already performed in “configure.ac” when using
autotools.

Signed-off-by: Pierre Moreau 
---
v2:
* Replace the initial solution using a `split()`, by instead directly
  using Meson’s own `version()` function. (Dylan Baker)
* Update the commit message to mention the `version()` method.

 src/gallium/state_trackers/clover/meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/meson.build 
b/src/gallium/state_trackers/clover/meson.build
index a6729af2fb8..62ac5f5278d 100644
--- a/src/gallium/state_trackers/clover/meson.build
+++ b/src/gallium/state_trackers/clover/meson.build
@@ -53,7 +53,7 @@ libclllvm = static_library(
 
'-DLIBCLC_LIBEXECDIR="@0@/"'.format(dep_clc.get_pkgconfig_variable('libexecdir')),
 '-DCLANG_RESOURCE_DIR="@0@"'.format(join_paths(
   dep_llvm.get_configtool_variable('libdir'), 'clang',
-  dep_llvm.get_configtool_variable('version'), 'include',
+  dep_llvm.version(), 'include',
 )),
   ],
   dependencies : [dep_llvm, dep_elf],
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] winsys/amdgpu: use the new BO list API

2019-01-07 Thread Marek Olšák
From: Marek Olšák 

---
I'll bump the libdrm version requirement after the libdrm patch lands.

 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 74 ---
 1 file changed, 25 insertions(+), 49 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 9e4de7779e2..72cf1e6c639 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -1290,52 +1290,53 @@ static bool amdgpu_add_sparse_backing_buffers(struct 
amdgpu_cs_context *cs)
 
return true;
 }
 
 void amdgpu_cs_submit_ib(void *job, int thread_index)
 {
struct amdgpu_cs *acs = (struct amdgpu_cs*)job;
struct amdgpu_winsys *ws = acs->ctx->ws;
struct amdgpu_cs_context *cs = acs->cst;
int i, r;
-   amdgpu_bo_list_handle bo_list = NULL;
+   uint32_t bo_list = 0;
uint64_t seq_no = 0;
bool has_user_fence = amdgpu_cs_has_user_fence(cs);
bool use_bo_list_create = ws->info.drm_minor < 27;
struct drm_amdgpu_bo_list_in bo_list_in;
 
/* Prepare the buffer list. */
if (ws->debug_all_bos) {
   /* The buffer list contains all buffers. This is a slow path that
* ensures that no buffer is missing in the BO list.
*/
+  unsigned num_handles = 0;
+  struct drm_amdgpu_bo_list_entry *list =
+ alloca(ws->num_buffers * sizeof(struct drm_amdgpu_bo_list_entry));
   struct amdgpu_winsys_bo *bo;
-  amdgpu_bo_handle *handles;
-  unsigned num = 0;
 
   simple_mtx_lock(>global_bo_list_lock);
-  handles = alloca(sizeof(handles[0]) * ws->num_buffers);
-
   LIST_FOR_EACH_ENTRY(bo, >global_bo_list, u.real.global_list_item) {
- assert(num < ws->num_buffers);
- handles[num++] = bo->bo;
+ if (bo->is_local)
+continue;
+
+ list[num_handles].bo_handle = bo->u.real.kms_handle;
+ list[num_handles].bo_priority = 0;
+ ++num_handles;
   }
 
-  r = amdgpu_bo_list_create(ws->dev, ws->num_buffers,
-handles, NULL, _list);
+  r = amdgpu_bo_list_create_raw(ws->dev, ws->num_buffers, list, _list);
   simple_mtx_unlock(>global_bo_list_lock);
   if (r) {
  fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r);
  goto cleanup;
   }
-   } else if (!use_bo_list_create) {
-  /* Standard path passing the buffer list via the CS ioctl. */
+   } else {
   if (!amdgpu_add_sparse_backing_buffers(cs)) {
  fprintf(stderr, "amdgpu: amdgpu_add_sparse_backing_buffers failed\n");
  r = -ENOMEM;
  goto cleanup;
   }
 
   struct drm_amdgpu_bo_list_entry *list =
  alloca(cs->num_real_buffers * sizeof(struct 
drm_amdgpu_bo_list_entry));
 
   unsigned num_handles = 0;
@@ -1345,59 +1346,34 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
  if (buffer->bo->is_local)
 continue;
 
  assert(buffer->u.real.priority_usage != 0);
 
  list[num_handles].bo_handle = buffer->bo->u.real.kms_handle;
  list[num_handles].bo_priority = 
(util_last_bit(buffer->u.real.priority_usage) - 1) / 2;
  ++num_handles;
   }
 
-  bo_list_in.operation = ~0;
-  bo_list_in.list_handle = ~0;
-  bo_list_in.bo_number = num_handles;
-  bo_list_in.bo_info_size = sizeof(struct drm_amdgpu_bo_list_entry);
-  bo_list_in.bo_info_ptr = (uint64_t)(uintptr_t)list;
-   } else {
-  /* Legacy path creating the buffer list handle and passing it to the CS 
ioctl. */
-  unsigned num_handles;
-
-  if (!amdgpu_add_sparse_backing_buffers(cs)) {
- fprintf(stderr, "amdgpu: amdgpu_add_sparse_backing_buffers failed\n");
- r = -ENOMEM;
- goto cleanup;
-  }
-
-  amdgpu_bo_handle *handles = alloca(sizeof(*handles) * 
cs->num_real_buffers);
-  uint8_t *flags = alloca(sizeof(*flags) * cs->num_real_buffers);
-
-  num_handles = 0;
-  for (i = 0; i < cs->num_real_buffers; ++i) {
- struct amdgpu_cs_buffer *buffer = >real_buffers[i];
-
-if (buffer->bo->is_local)
-continue;
-
- assert(buffer->u.real.priority_usage != 0);
-
- handles[num_handles] = buffer->bo->bo;
- flags[num_handles] = (util_last_bit(buffer->u.real.priority_usage) - 
1) / 2;
-++num_handles;
-  }
-
-  if (num_handles) {
- r = amdgpu_bo_list_create(ws->dev, num_handles,
-   handles, flags, _list);
+  if (use_bo_list_create) {
+ /* Legacy path creating the buffer list handle and passing it to the 
CS ioctl. */
+ r = amdgpu_bo_list_create_raw(ws->dev, num_handles, list, _list);
  if (r) {
 fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r);
 goto cleanup;
  }
+  } else {
+ /* Standard path passing the buffer list via the CS ioctl. */
+ bo_list_in.operation = ~0;
+ 

Re: [Mesa-dev] [PATCH 08/10] intel/fs: Remove existing lower_conversions pass.

2019-01-07 Thread Francisco Jerez
Iago Toral  writes:

> On Sat, 2018-12-29 at 12:39 -0800, Francisco Jerez wrote:
>> It's redundant with the functionality provided by lower_regioning
>> now.
>> ---
>>  src/intel/Makefile.sources|   1 -
>>  src/intel/compiler/brw_fs.cpp |   1 -
>>  src/intel/compiler/brw_fs.h   |   1 -
>>  .../compiler/brw_fs_lower_conversions.cpp | 132 
>> --
>>  src/intel/compiler/meson.build|   1 -
>>  5 files changed, 136 deletions(-)
>>  delete mode 100644 src/intel/compiler/brw_fs_lower_conversions.cpp
>> 
>> diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
>> index 6b9874d2b80..fe06a57b42e 100644
>> --- a/src/intel/Makefile.sources
>> +++ b/src/intel/Makefile.sources
>> @@ -62,7 +62,6 @@ COMPILER_FILES = \
>>  compiler/brw_fs.h \
>>  compiler/brw_fs_live_variables.cpp \
>>  compiler/brw_fs_live_variables.h \
>> -compiler/brw_fs_lower_conversions.cpp \
>>  compiler/brw_fs_lower_pack.cpp \
>>  compiler/brw_fs_lower_regioning.cpp \
>>  compiler/brw_fs_nir.cpp \
>> diff --git a/src/intel/compiler/brw_fs.cpp
>> b/src/intel/compiler/brw_fs.cpp
>> index caa7a798332..d6280d558ec 100644
>> --- a/src/intel/compiler/brw_fs.cpp
>> +++ b/src/intel/compiler/brw_fs.cpp
>> @@ -6472,7 +6472,6 @@ fs_visitor::optimize()
>> }
>>  
>> progress = false;
>> -   OPT(lower_conversions);
>> OPT(lower_regioning);
>> if (progress) {
>>OPT(opt_copy_propagation);
>
> If you didn't do this in the previous patch, then maybe do it here:
>
> if (OPT(lower_regioning)) {
>...
> }
>
> and avoid resetting progress.
>

I left this lying around because there is another legalization pass
coming up that should cause the same post-lowering optimization passes
to be executed if progress is made.  I can clean things up though if you
like, and re-introduce the reset of the progress flag in the future
commit.

>> diff --git a/src/intel/compiler/brw_fs.h
>> b/src/intel/compiler/brw_fs.h
>> index 36825754931..7edaa3af43c 100644
>> --- a/src/intel/compiler/brw_fs.h
>> +++ b/src/intel/compiler/brw_fs.h
>> @@ -165,7 +165,6 @@ public:
>> bool lower_load_payload();
>> bool lower_pack();
>> bool lower_regioning();
>> -   bool lower_conversions();
>> bool lower_logical_sends();
>> bool lower_integer_multiplication();
>> bool lower_minmax();
>> diff --git a/src/intel/compiler/brw_fs_lower_conversions.cpp
>> b/src/intel/compiler/brw_fs_lower_conversions.cpp
>> deleted file mode 100644
>> index 145fb55f995..000
>> --- a/src/intel/compiler/brw_fs_lower_conversions.cpp
>> +++ /dev/null
>> @@ -1,132 +0,0 @@
>> -/*
>> - * Copyright © 2015 Connor Abbott
>> - *
>> - * Permission is hereby granted, free of charge, to any person
>> obtaining a
>> - * copy of this software and associated documentation files (the
>> "Software"),
>> - * to deal in the Software without restriction, including without
>> limitation
>> - * the rights to use, copy, modify, merge, publish, distribute,
>> sublicense,
>> - * and/or sell copies of the Software, and to permit persons to whom
>> the
>> - * Software is furnished to do so, subject to the following
>> conditions:
>> - *
>> - * The above copyright notice and this permission notice (including
>> the next
>> - * paragraph) shall be included in all copies or substantial
>> portions of the
>> - * Software.
>> - *
>> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> EXPRESS OR
>> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> MERCHANTABILITY,
>> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
>> EVENT SHALL
>> - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
>> OR OTHER
>> - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> ARISING
>> - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> OTHER DEALINGS
>> - * IN THE SOFTWARE.
>> - */
>> -
>> -#include "brw_fs.h"
>> -#include "brw_cfg.h"
>> -#include "brw_fs_builder.h"
>> -
>> -using namespace brw;
>> -
>> -static bool
>> -supports_type_conversion(const fs_inst *inst) {
>> -   switch (inst->opcode) {
>> -   case BRW_OPCODE_MOV:
>> -   case SHADER_OPCODE_MOV_INDIRECT:
>> -  return true;
>> -   case BRW_OPCODE_SEL:
>> -  return inst->dst.type == get_exec_type(inst);
>> -   default:
>> -  /* FIXME: We assume the opcodes don't explicitly mentioned
>> -   * before just work fine with arbitrary conversions.
>> -   */
>> -  return true;
>> -   }
>> -}
>> -
>> -/* From the SKL PRM Vol 2a, "Move":
>> - *
>> - *"A mov with the same source and destination type, no source
>> modifier,
>> - * and no saturation is a raw move. A packed byte destination
>> region (B
>> - * or UB type with HorzStride == 1 and ExecSize > 1) can only be
>> written
>> - * using raw move."
>> - */
>> -static bool
>> -is_byte_raw_mov (const fs_inst *inst)
>> -{
>> -   return type_sz(inst->dst.type) == 1 

Re: [Mesa-dev] [PATCH] editorconfig: Add max_line_length property

2019-01-07 Thread Dylan Baker
Quoting Andres Gomez (2019-01-07 05:49:00)
> The property is supported by the most of the editors, but not all:
> https://github.com/editorconfig/editorconfig/wiki/EditorConfig-Properties#max_line_length
> 
> Cc: Eric Engestrom 
> Cc: Eric Anholt 
> Signed-off-by: Andres Gomez 
> ---
>  .editorconfig | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/.editorconfig b/.editorconfig
> index 06848f68c08..c2b772da6c1 100644
> --- a/.editorconfig
> +++ b/.editorconfig
> @@ -11,6 +11,7 @@ tab_width = 8
>  [*.{c,h,cpp,hpp,cc,hh}]
>  indent_style = space
>  indent_size = 3
> +max_line_length = 78
>  
>  [{Makefile*,*.mk}]
>  indent_style = tab
> -- 
> 2.18.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

I think the big ones are covered here: vim, emacs, atom, intellij (and
deriviatives), resharper, rider. That seems like it covers pretty much everyone.

Reviewed-by: Dylan Baker 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/12] nir: add bit_size parameter to system values with multiple allowed bit sizes

2019-01-07 Thread Jason Ekstrand
First off, thank you very much for cleaning this up!

On Tue, Dec 4, 2018 at 12:27 PM Karol Herbst  wrote:

> Signed-off-by: Karol Herbst 
> ---
>  src/compiler/nir/nir_builder_opcodes_h.py | 14 --
>  src/compiler/nir/nir_lower_system_values.c|  4 ++--
>  src/gallium/drivers/vc4/vc4_nir_lower_blend.c |  4 ++--
>  3 files changed, 16 insertions(+), 6 deletions(-)
>
> diff --git a/src/compiler/nir/nir_builder_opcodes_h.py
> b/src/compiler/nir/nir_builder_opcodes_h.py
> index 34b8c4371e1..f2e33071c6d 100644
> --- a/src/compiler/nir/nir_builder_opcodes_h.py
> +++ b/src/compiler/nir/nir_builder_opcodes_h.py
> @@ -44,13 +44,14 @@ nir_${name}(nir_builder *build,
> ${src_decl_list(opcode.num_inputs)})
>
>  /* Generic builder for system values. */
>  static inline nir_ssa_def *
> -nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index)
> +nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index,
> +  unsigned bit_size)
>  {
> nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader,
> op);
> load->num_components = nir_intrinsic_infos[op].dest_components;
> load->const_index[0] = index;
> nir_ssa_dest_init(>instr, >dest,
> - nir_intrinsic_infos[op].dest_components, 32, NULL);
> + nir_intrinsic_infos[op].dest_components, bit_size,
> NULL);
> nir_builder_instr_insert(build, >instr);
> return >dest.ssa;
>  }
> @@ -60,6 +61,8 @@ def sysval_decl_list(opcode):
> res = ''
> if opcode.indices:
>res += ', unsigned ' + opcode.indices[0].lower()
> +   if len(opcode.bit_sizes) > 1:
> +  res += ', unsigned bit_size'
>

This if statement is out-of-sync with the one below.  Perhaps
"len(opcodes.bit_sizes) != 1" instead?  It may also be good to add "assert
len(opcode.bit_sizes) > 0" somewhere to make it clear what our assumptions
are.


> return res
>
>  def sysval_arg_list(opcode):
> @@ -68,6 +71,13 @@ def sysval_arg_list(opcode):
>args.append(opcode.indices[0].lower())
> else:
>args.append('0')
> +
> +   if len(opcode.bit_sizes) == 1:
> +  bit_size = opcode.bit_sizes[0]
> +  args.append(str(bit_size))
> +   else:
> +  args.append('bit_size')
> +
> return ', '.join(args)
>  %>
>
> diff --git a/src/compiler/nir/nir_lower_system_values.c
> b/src/compiler/nir/nir_lower_system_values.c
> index 08a9e8be44a..68b0ea89c8d 100644
> --- a/src/compiler/nir/nir_lower_system_values.c
> +++ b/src/compiler/nir/nir_lower_system_values.c
> @@ -261,8 +261,8 @@ convert_block(nir_block *block, nir_builder *b)
>if (sysval == NULL) {
>   nir_intrinsic_op sysval_op =
>  nir_intrinsic_from_system_value(var->data.location);
> - sysval = nir_load_system_value(b, sysval_op, 0);
> - sysval->bit_size = load_deref->dest.ssa.bit_size;
> + sysval = nir_load_system_value(b, sysval_op, 0,
> +load_deref->dest.ssa.bit_size);
>

This is so gross I'm happy to see it gone!


>}
>
>nir_ssa_def_rewrite_uses(_deref->dest.ssa,
> nir_src_for_ssa(sysval));
> diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
> b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
> index 60eccb4fc00..f80558722a1 100644
> --- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
> +++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
> @@ -130,7 +130,7 @@ vc4_blend_channel_f(nir_builder *b,
>  return nir_load_system_value(b,
>
> nir_intrinsic_load_blend_const_color_r_float +
>   channel,
> - 0);
> + 0, 32);
>  case PIPE_BLENDFACTOR_CONST_ALPHA:
>  return nir_load_blend_const_color_a_float(b);
>  case PIPE_BLENDFACTOR_ZERO:
> @@ -148,7 +148,7 @@ vc4_blend_channel_f(nir_builder *b,
>  nir_load_system_value(b,
>
>  nir_intrinsic_load_blend_const_color_r_float +
>channel,
> -  0));
> +  0, 32));
>  case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
>  return nir_fsub(b, nir_imm_float(b, 1.0),
>  nir_load_blend_const_color_a_float(b));
> --
> 2.19.2
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/12] nir: add support for address bit sized system values

2019-01-07 Thread Jason Ekstrand
On Tue, Dec 4, 2018 at 12:27 PM Karol Herbst  wrote:

> Signed-off-by: Karol Herbst 
> ---
>  src/amd/vulkan/radv_meta_buffer.c  |  8 ++--
>  src/amd/vulkan/radv_meta_bufimage.c| 28 +++---
>  src/amd/vulkan/radv_meta_clear.c   |  4 +-
>  src/amd/vulkan/radv_meta_fast_clear.c  |  4 +-
>  src/amd/vulkan/radv_meta_resolve_cs.c  |  4 +-
>  src/amd/vulkan/radv_query.c| 12 +++---
>  src/compiler/nir/nir_intrinsics.py | 12 +++---
>  src/compiler/nir/nir_lower_system_values.c | 43 +-
>  8 files changed, 61 insertions(+), 54 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_meta_buffer.c
> b/src/amd/vulkan/radv_meta_buffer.c
> index 76854d7bbad..208988c3775 100644
> --- a/src/amd/vulkan/radv_meta_buffer.c
> +++ b/src/amd/vulkan/radv_meta_buffer.c
> @@ -15,8 +15,8 @@ build_buffer_fill_shader(struct radv_device *dev)
> b.shader->info.cs.local_size[1] = 1;
> b.shader->info.cs.local_size[2] = 1;
>
> -   nir_ssa_def *invoc_id = nir_load_local_invocation_id();
> -   nir_ssa_def *wg_id = nir_load_work_group_id();
> +   nir_ssa_def *invoc_id = nir_load_local_invocation_id(, 32);
> +   nir_ssa_def *wg_id = nir_load_work_group_id(, 32);
> nir_ssa_def *block_size = nir_imm_ivec4(,
>
> b.shader->info.cs.local_size[0],
>
> b.shader->info.cs.local_size[1],
> @@ -67,8 +67,8 @@ build_buffer_copy_shader(struct radv_device *dev)
> b.shader->info.cs.local_size[1] = 1;
> b.shader->info.cs.local_size[2] = 1;
>
> -   nir_ssa_def *invoc_id = nir_load_local_invocation_id();
> -   nir_ssa_def *wg_id = nir_load_work_group_id();
> +   nir_ssa_def *invoc_id = nir_load_local_invocation_id(, 32);
> +   nir_ssa_def *wg_id = nir_load_work_group_id(, 32);
> nir_ssa_def *block_size = nir_imm_ivec4(,
>
> b.shader->info.cs.local_size[0],
>
> b.shader->info.cs.local_size[1],
> diff --git a/src/amd/vulkan/radv_meta_bufimage.c
> b/src/amd/vulkan/radv_meta_bufimage.c
> index 45df8438234..c8a733b3062 100644
> --- a/src/amd/vulkan/radv_meta_bufimage.c
> +++ b/src/amd/vulkan/radv_meta_bufimage.c
> @@ -60,8 +60,8 @@ build_nir_itob_compute_shader(struct radv_device *dev,
> bool is_3d)
> output_img->data.descriptor_set = 0;
> output_img->data.binding = 1;
>
> -   nir_ssa_def *invoc_id = nir_load_local_invocation_id();
> -   nir_ssa_def *wg_id = nir_load_work_group_id();
> +   nir_ssa_def *invoc_id = nir_load_local_invocation_id(, 32);
> +   nir_ssa_def *wg_id = nir_load_work_group_id(, 32);
> nir_ssa_def *block_size = nir_imm_ivec4(,
>
> b.shader->info.cs.local_size[0],
>
> b.shader->info.cs.local_size[1],
> @@ -289,8 +289,8 @@ build_nir_btoi_compute_shader(struct radv_device *dev,
> bool is_3d)
> output_img->data.descriptor_set = 0;
> output_img->data.binding = 1;
>
> -   nir_ssa_def *invoc_id = nir_load_local_invocation_id();
> -   nir_ssa_def *wg_id = nir_load_work_group_id();
> +   nir_ssa_def *invoc_id = nir_load_local_invocation_id(, 32);
> +   nir_ssa_def *wg_id = nir_load_work_group_id(, 32);
> nir_ssa_def *block_size = nir_imm_ivec4(,
>
> b.shader->info.cs.local_size[0],
>
> b.shader->info.cs.local_size[1],
> @@ -511,8 +511,8 @@ build_nir_btoi_r32g32b32_compute_shader(struct
> radv_device *dev)
> output_img->data.descriptor_set = 0;
> output_img->data.binding = 1;
>
> -   nir_ssa_def *invoc_id = nir_load_local_invocation_id();
> -   nir_ssa_def *wg_id = nir_load_work_group_id();
> +   nir_ssa_def *invoc_id = nir_load_local_invocation_id(, 32);
> +   nir_ssa_def *wg_id = nir_load_work_group_id(, 32);
> nir_ssa_def *block_size = nir_imm_ivec4(,
>
> b.shader->info.cs.local_size[0],
>
> b.shader->info.cs.local_size[1],
> @@ -719,8 +719,8 @@ build_nir_itoi_compute_shader(struct radv_device *dev,
> bool is_3d)
> output_img->data.descriptor_set = 0;
> output_img->data.binding = 1;
>
> -   nir_ssa_def *invoc_id = nir_load_local_invocation_id();
> -   nir_ssa_def *wg_id = nir_load_work_group_id();
> +   nir_ssa_def *invoc_id = nir_load_local_invocation_id(, 32);
> +   nir_ssa_def *wg_id = nir_load_work_group_id(, 32);
> nir_ssa_def *block_size = nir_imm_ivec4(,
>
> b.shader->info.cs.local_size[0],
>
> b.shader->info.cs.local_size[1],
> @@ -932,8 +932,8 @@ build_nir_itoi_r32g32b32_compute_shader(struct
> radv_device *dev)
> output_img->data.descriptor_set = 0;
> output_img->data.binding = 1;
>
> -   nir_ssa_def *invoc_id = nir_load_local_invocation_id();
> -   nir_ssa_def *wg_id = nir_load_work_group_id();
> +   nir_ssa_def *invoc_id = nir_load_local_invocation_id(, 32);
> +   nir_ssa_def *wg_id = nir_load_work_group_id(, 32);
> nir_ssa_def *block_size = nir_imm_ivec4(,
>
> b.shader->info.cs.local_size[0],
>
> b.shader->info.cs.local_size[1],
> @@ -1139,8 +1139,8 @@ 

Re: [Mesa-dev] [PATCH 03/10] intel/fs: Fix bug in lower_simd_width while splitting an instruction which was already split.

2019-01-07 Thread Francisco Jerez
Iago Toral  writes:

> On Sat, 2018-12-29 at 12:38 -0800, Francisco Jerez wrote:
>> This seems to be a problem in combination with the lower_regioning
>> pass introduced by a future commit, which can modify a SIMD-split
>> instruction causing its execution size to become illegal again.  A
>> subsequent call to lower_simd_width() would hit this bug on a future
>> platform.
>> 
>> Cc: mesa-sta...@lists.freedesktop.org
>> ---
>>  src/intel/compiler/brw_fs.cpp | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>> 
>> diff --git a/src/intel/compiler/brw_fs.cpp
>> b/src/intel/compiler/brw_fs.cpp
>> index 97544fdf465..4aacc72a1b7 100644
>> --- a/src/intel/compiler/brw_fs.cpp
>> +++ b/src/intel/compiler/brw_fs.cpp
>> @@ -5666,7 +5666,7 @@ static fs_reg
>>  emit_unzip(const fs_builder , fs_inst *inst, unsigned i)
>>  {
>> /* Specified channel group from the source region. */
>> -   const fs_reg src = horiz_offset(inst->src[i], lbld.group());
>> +   const fs_reg src = horiz_offset(inst->src[i], lbld.group() -
>> inst->group);
>
> Should we assert that lbld.group >= inst->group? Same below.
>

The IR will fail validation anytime that's not the case.  But I can add
the assertions in both places if that makes you feel more comfortable.

>> if (needs_src_copy(lbld, inst, i)) {
>>/* Builder of the right width to perform the copy avoiding
>> uninitialized
>> @@ -5757,7 +5757,7 @@ emit_zip(const fs_builder _before, const
>> fs_builder _after,
>> assert(lbld_before.group() == lbld_after.group());
>>  
>> /* Specified channel group from the destination region. */
>> -   const fs_reg dst = horiz_offset(inst->dst, lbld_after.group());
>> +   const fs_reg dst = horiz_offset(inst->dst, lbld_after.group() -
>> inst->group);
>> const unsigned dst_size = inst->size_written /
>>inst->dst.component_size(inst->exec_size);
>>  


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] clover/meson: Ignore 'svn' suffix when computing CLANG_RESOURCE_DIR

2019-01-07 Thread Dylan Baker
Quoting Pierre Moreau (2019-01-07 12:30:17)
> The version exported by LLVM in its CMake configuration files can
> include the \u201csvn\u201d suffix when building a development version (for
> example \u201c8.0.0svn\u201d). However the exported clang headers are still 
> found
> under \u201clib/clang/8.0.0/\u201d, without the \u201csvn\u201d suffix.
> Meson takes care of removing the \u201csvn\u201d suffix from the version when
> using the dependency\u2019s `version()` method.
> 
> This processing is already performed in \u201cconfigure.ac\u201d when using
> autotools.
> 
> Signed-off-by: Pierre Moreau 
> ---
> v2:
> * Replace the initial solution using a `split()`, by instead directly
>   using Meson\u2019s own `version()` function. (Dylan Baker)
> * Update the commit message to mention the `version()` method.
> 
>  src/gallium/state_trackers/clover/meson.build | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/state_trackers/clover/meson.build 
> b/src/gallium/state_trackers/clover/meson.build
> index a6729af2fb8..62ac5f5278d 100644
> --- a/src/gallium/state_trackers/clover/meson.build
> +++ b/src/gallium/state_trackers/clover/meson.build
> @@ -53,7 +53,7 @@ libclllvm = static_library(
>  
> '-DLIBCLC_LIBEXECDIR="@0@/"'.format(dep_clc.get_pkgconfig_variable('libexecdir')),
>  '-DCLANG_RESOURCE_DIR="@0@"'.format(join_paths(
>dep_llvm.get_configtool_variable('libdir'), 'clang',
> -  dep_llvm.get_configtool_variable('version'), 'include',
> +  dep_llvm.version(), 'include',
>  )),
>],
>dependencies : [dep_llvm, dep_elf],
> -- 
> 2.20.1
> 

Reviewed-by: Dylan Baker 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radv: add support for VK_EXT_memory_budget

2019-01-07 Thread Samuel Pitoiset


On 1/7/19 6:06 PM, Alex Smith wrote:

Hi Samuel,

Thanks for implementing this - I've been wanting this extension for a 
while so it's good it's finally available.


This is just reporting the total heap sizes as the budget, which is 
the same info we already get from the basic heap properties. The way 
I'd expected budget to work (and what the spec is saying as far as I 
can see) is that it's an estimate of how much is available for the 
calling app to use in that heap at the time of the call, so should 
account for current system-wide usage of the heap by other apps. 
Shouldn't this be something like (heap size - system wide usage of the 
heap + current app usage of the heap)? (+ app usage since the spec 
says budget includes currently allocated device memory)


Hi Alex,

Yes, I was also wondering about that. We can add per-process counters 
for VRAM and GTT heaps, but I don't see how we can be accurate for the 
visible VRAM heap.


As said in the commit description, that implementation is really 
inacurate. Though if you need something better I can improve.


Note that I agree with you about the spec.



Alex

On Mon, 7 Jan 2019 at 16:35, Samuel Pitoiset 
mailto:samuel.pitoi...@gmail.com>> wrote:


A simple Vulkan extension that allows apps to query size and
usage of all exposed memory heaps.

The different usage values are not really accurate because
they are per drm-fd, but they should be close enough.

Signed-off-by: Samuel Pitoiset mailto:samuel.pitoi...@gmail.com>>
---
 src/amd/vulkan/radv_device.c      | 44
+++
 src/amd/vulkan/radv_extensions.py |  1 +
 2 files changed, 45 insertions(+)

diff --git a/src/amd/vulkan/radv_device.c
b/src/amd/vulkan/radv_device.c
index cef3a430555..32eaeb3b226 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1352,12 +1352,56 @@ void radv_GetPhysicalDeviceMemoryProperties(
        *pMemoryProperties = physical_device->memory_properties;
 }

+static void
+radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
+  VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
+{
+       RADV_FROM_HANDLE(radv_physical_device, device,
physicalDevice);
+       VkPhysicalDeviceMemoryProperties *memory_properties =
>memory_properties;
+       uint64_t visible_vram_size =
radv_get_visible_vram_size(device);
+       uint64_t vram_size = radv_get_vram_size(device);
+       uint64_t gtt_size = device->rad_info.gart_size;
+
+       if (vram_size) {
+  memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM] = vram_size;
+               memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM] =
+  device->ws->query_value(device->ws, RADEON_VRAM_USAGE);
+       }
+
+       if (visible_vram_size) {
+  memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM_CPU_ACCESS] =
visible_vram_size;
+  memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM_CPU_ACCESS] =
+  device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE);
+       }
+
+       if (gtt_size) {
+               memoryBudget->heapBudget[RADV_MEM_HEAP_GTT] =
gtt_size;
+               memoryBudget->heapUsage[RADV_MEM_HEAP_GTT] =
+  device->ws->query_value(device->ws, RADEON_GTT_USAGE);
+       }
+
+       /* The heapBudget and heapUsage values must be zero for
array elements
+        * greater than or equal to
+        * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
+        */
+       for (uint32_t i = memory_properties->memoryHeapCount; i <
VK_MAX_MEMORY_HEAPS; i++) {
+               memoryBudget->heapBudget[i] = 0;
+               memoryBudget->heapUsage[i] = 0;
+       }
+}
+
 void radv_GetPhysicalDeviceMemoryProperties2(
        VkPhysicalDevice physicalDevice,
        VkPhysicalDeviceMemoryProperties2KHR  *pMemoryProperties)
 {
        radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
 >memoryProperties);
+
+       VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
+               vk_find_struct(pMemoryProperties->pNext,
+ PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
+       if (memory_budget)
+  radv_get_memory_budget_properties(physicalDevice, memory_budget);
 }

 VkResult radv_GetMemoryHostPointerPropertiesEXT(
diff --git a/src/amd/vulkan/radv_extensions.py
b/src/amd/vulkan/radv_extensions.py
index 9952bb9c1c6..491ed9d94c3 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -105,6 +105,7 @@ EXTENSIONS = [
     Extension('VK_EXT_external_memory_dma_buf',           1, True),
     Extension('VK_EXT_external_memory_host',              1,
'device->rad_info.has_userptr'),
     Extension('VK_EXT_global_priority',                   1,
'device->rad_info.has_ctx_priority'),
+    

Re: [Mesa-dev] [PATCH 1/2] spirv: Sort supported capabilities

2019-01-07 Thread Caio Marcelo de Oliveira Filho
This and the other patch are

Reviewed-by: Caio Marcelo de Oliveira Filho 



On Mon, Jan 07, 2019 at 10:53:09AM -0600, Jason Ekstrand wrote:
> ---
>  src/amd/vulkan/radv_shader.c| 24 -
>  src/compiler/shader_info.h  | 34 -
>  src/intel/vulkan/anv_pipeline.c | 18 ++---
>  src/mesa/drivers/dri/i965/brw_context.c | 10 
>  4 files changed, 43 insertions(+), 43 deletions(-)
> 
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 7ad9abe8df8..7220738c7c8 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -220,32 +220,32 @@ radv_shader_compile_to_nir(struct radv_device *device,
>   }
>   const struct spirv_to_nir_options spirv_options = {
>   .caps = {
> + .descriptor_array_dynamic_indexing = true,
>   .device_group = true,
>   .draw_parameters = true,
>   .float64 = true,
> + .gcn_shader = true,
> + .geometry_streams = true,
>   .image_read_without_format = true,
>   .image_write_without_format = true,
> - .tessellation = true,
> - .int64 = true,
>   .int16 = true,
> + .int64 = true,
>   .multiview = true,
> + .runtime_descriptor_array = true,
> + .shader_viewport_index_layer = true,
> + .stencil_export = true,
> + .storage_16bit = true,
> + .storage_image_ms = true,
>   .subgroup_arithmetic = true,
>   .subgroup_ballot = true,
>   .subgroup_basic = true,
>   .subgroup_quad = true,
>   .subgroup_shuffle = true,
>   .subgroup_vote = true,
> - .variable_pointers = true,
> - .gcn_shader = true,
> - .trinary_minmax = true,
> - .shader_viewport_index_layer = true,
> - .descriptor_array_dynamic_indexing = true,
> - .runtime_descriptor_array = true,
> - .stencil_export = true,
> - .storage_16bit = true,
> - .geometry_streams = true,
> + .tessellation = true,
>   .transform_feedback = true,
> - .storage_image_ms = true,
> + .trinary_minmax = true,
> + .variable_pointers = true,
>   },
>   };
>   entry_point = spirv_to_nir(spirv, module->size / 4,
> diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
> index 05f37c8d197..dc47cd7656d 100644
> --- a/src/compiler/shader_info.h
> +++ b/src/compiler/shader_info.h
> @@ -33,38 +33,38 @@ extern "C" {
>  #endif
>  
>  struct spirv_supported_capabilities {
> -   bool float64;
> -   bool image_ms_array;
> -   bool tessellation;
> +   bool atomic_storage;
> +   bool descriptor_array_dynamic_indexing;
> bool device_group;
> bool draw_parameters;
> +   bool float64;
> +   bool geometry_streams;
> +   bool gcn_shader;
> +   bool image_ms_array;
> bool image_read_without_format;
> bool image_write_without_format;
> +   bool int16;
> bool int64;
> +   bool int64_atomics;
> bool min_lod;
> bool multiview;
> -   bool variable_pointers;
> -   bool storage_16bit;
> -   bool int16;
> +   bool post_depth_coverage;
> +   bool runtime_descriptor_array;
> bool shader_viewport_index_layer;
> +   bool stencil_export;
> +   bool storage_8bit;
> +   bool storage_16bit;
> +   bool storage_image_ms;
> bool subgroup_arithmetic;
> bool subgroup_ballot;
> bool subgroup_basic;
> bool subgroup_quad;
> bool subgroup_shuffle;
> bool subgroup_vote;
> -   bool gcn_shader;
> -   bool trinary_minmax;
> -   bool descriptor_array_dynamic_indexing;
> -   bool runtime_descriptor_array;
> -   bool stencil_export;
> -   bool atomic_storage;
> -   bool storage_8bit;
> -   bool post_depth_coverage;
> +   bool tessellation;
> bool transform_feedback;
> -   bool geometry_streams;
> -   bool int64_atomics;
> -   bool storage_image_ms;
> +   bool trinary_minmax;
> +   bool variable_pointers;
>  };
>  
>  typedef struct shader_info {
> diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
> index 6db9945e0d4..a9ae379967b 100644
> --- 

Re: [Mesa-dev] [PATCH 12/12] nir/spirv: handle ContractionOff execution mode

2019-01-07 Thread Jason Ekstrand
Reviewed-by: Jason Ekstrand 

On Tue, Dec 4, 2018 at 12:27 PM Karol Herbst  wrote:

> Signed-off-by: Karol Herbst 
> ---
>  src/compiler/spirv/spirv_info.h| 1 +
>  src/compiler/spirv/spirv_info_c.py | 1 +
>  src/compiler/spirv/spirv_to_nir.c  | 9 -
>  src/compiler/spirv/vtn_alu.c   | 4 ++--
>  src/compiler/spirv/vtn_cfg.c   | 2 ++
>  src/compiler/spirv/vtn_private.h   | 3 +++
>  6 files changed, 17 insertions(+), 3 deletions(-)
>
> diff --git a/src/compiler/spirv/spirv_info.h
> b/src/compiler/spirv/spirv_info.h
> index 121ffd2febb..a03c2ef8eb0 100644
> --- a/src/compiler/spirv/spirv_info.h
> +++ b/src/compiler/spirv/spirv_info.h
> @@ -28,6 +28,7 @@
>
>  const char *spirv_capability_to_string(SpvCapability cap);
>  const char *spirv_decoration_to_string(SpvDecoration dec);
> +const char *spirv_executionmode_to_string(SpvExecutionMode mode);
>  const char *spirv_op_to_string(SpvOp op);
>
>  #endif /* SPIRV_INFO_H */
> diff --git a/src/compiler/spirv/spirv_info_c.py
> b/src/compiler/spirv/spirv_info_c.py
> index ff7942bcd3a..6880d3e329d 100644
> --- a/src/compiler/spirv/spirv_info_c.py
> +++ b/src/compiler/spirv/spirv_info_c.py
> @@ -90,6 +90,7 @@ if __name__ == "__main__":
>  info = [
>  collect_data(spirv_info, "Capability"),
>  collect_data(spirv_info, "Decoration"),
> +collect_data(spirv_info, "ExecutionMode"),
>  collect_opcodes(spirv_info),
>  ]
>
> diff --git a/src/compiler/spirv/spirv_to_nir.c
> b/src/compiler/spirv/spirv_to_nir.c
> index 1a7d5b3a9bd..488c61cf1c6 100644
> --- a/src/compiler/spirv/spirv_to_nir.c
> +++ b/src/compiler/spirv/spirv_to_nir.c
> @@ -3771,9 +3771,16 @@ vtn_handle_execution_mode(struct vtn_builder *b,
> struct vtn_value *entry_point,
>break;
>
> case SpvExecutionModeVecTypeHint:
> -   case SpvExecutionModeContractionOff:
>break; /* OpenCL */
>
> +   case SpvExecutionModeContractionOff:
> +  if (b->shader->info.stage != MESA_SHADER_KERNEL)
> + vtn_warn("ExectionMode only allowed for CL-style kernels: %s",
> +  spirv_executionmode_to_string(mode->exec_mode));
> +  else
> + b->exact = true;
> +  break;
> +
> case SpvExecutionModeStencilRefReplacingEXT:
>vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
>break;
> diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c
> index dc6fedc9129..f910630acfb 100644
> --- a/src/compiler/spirv/vtn_alu.c
> +++ b/src/compiler/spirv/vtn_alu.c
> @@ -395,7 +395,7 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
> if (glsl_type_is_matrix(vtn_src[0]->type) ||
> (num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) {
>vtn_handle_matrix_alu(b, opcode, val, vtn_src[0], vtn_src[1]);
> -  b->nb.exact = false;
> +  b->nb.exact = b->exact;
>return;
> }
>
> @@ -661,5 +661,5 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
> } /* default */
> }
>
> -   b->nb.exact = false;
> +   b->nb.exact = b->exact;
>  }
> diff --git a/src/compiler/spirv/vtn_cfg.c b/src/compiler/spirv/vtn_cfg.c
> index 5b3cc703f94..bc1a949fdee 100644
> --- a/src/compiler/spirv/vtn_cfg.c
> +++ b/src/compiler/spirv/vtn_cfg.c
> @@ -281,6 +281,7 @@ vtn_cfg_handle_prepass_instruction(struct vtn_builder
> *b, SpvOp opcode,
>b->func->impl = nir_function_impl_create(func);
>nir_builder_init(>nb, func->impl);
>b->nb.cursor = nir_before_cf_list(>func->impl->body);
> +  b->nb.exact = b->exact;
>
>b->func_param_idx = 0;
>
> @@ -1040,6 +1041,7 @@ vtn_function_emit(struct vtn_builder *b, struct
> vtn_function *func,
> nir_builder_init(>nb, func->impl);
> b->func = func;
> b->nb.cursor = nir_after_cf_list(>impl->body);
> +   b->nb.exact = b->exact;
> b->has_loop_continue = false;
> b->phi_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
>_mesa_key_pointer_equal);
> diff --git a/src/compiler/spirv/vtn_private.h
> b/src/compiler/spirv/vtn_private.h
> index e380d8e82ff..930beec30ef 100644
> --- a/src/compiler/spirv/vtn_private.h
> +++ b/src/compiler/spirv/vtn_private.h
> @@ -607,6 +607,9 @@ struct vtn_builder {
>
> bool has_loop_continue;
>
> +   /* false by default, set to true by the ContractionOff execution mode
> */
> +   bool exact;
> +
> /* when a physical memory model is choosen */
> bool physical_ptrs;
>  };
> --
> 2.19.2
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC] clover/meson: Ignore 'svn' suffix when computing CLANG_RESOURCE_DIR

2019-01-07 Thread Dylan Baker
Quoting Pierre Moreau (2019-01-06 05:06:04)
> The version exported by LLVM in its CMake configuration files can include
> the \u201csvn\u201d suffix when building a development version (for example
> \u201c8.0.0svn\u201d). However the exported clang headers are still found 
> under
> \u201clib/clang/8.0.0/\u201d, without the \u201csvn\u201d suffix.
> This processing is already performed in \u201cconfigure.ac\u201d when using
> autotools.
> 
> Signed-off-by: Pierre Moreau 
> ---
> I am not that familiar with Meson yet and couldn\u2019t found a better way to
> extract the version numbers from the version string than splitting the
> version string on the \u2018s\u2019 character.
> 
>  src/gallium/state_trackers/clover/meson.build | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/state_trackers/clover/meson.build 
> b/src/gallium/state_trackers/clover/meson.build
> index a6729af2fb8..1985c7583fb 100644
> --- a/src/gallium/state_trackers/clover/meson.build
> +++ b/src/gallium/state_trackers/clover/meson.build
> @@ -53,7 +53,7 @@ libclllvm = static_library(
>  
> '-DLIBCLC_LIBEXECDIR="@0@/"'.format(dep_clc.get_pkgconfig_variable('libexecdir')),
>  '-DCLANG_RESOURCE_DIR="@0@"'.format(join_paths(
>dep_llvm.get_configtool_variable('libdir'), 'clang',
> -  dep_llvm.get_configtool_variable('version'), 'include',
> +  dep_llvm.get_configtool_variable('version').split('s')[0], 'include',

I think you can just use dep_llvm.version() now, which does split the 'svn' or
'git' off the end already. When this was written it didn't do that.

>  )),
>],
>dependencies : [dep_llvm, dep_elf],
> -- 
> 2.20.1
> 

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109021] Kaveri no fix perfomance

2019-01-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109021

Bas Nieuwenhuizen  changed:

   What|Removed |Added

 Resolution|--- |NOTABUG
 Status|NEEDINFO|RESOLVED

--- Comment #7 from Bas Nieuwenhuizen  ---
Unless you have numbers that for GL it actually improves performance for some
implications I'm not going to spend time on this.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109021] Kaveri no fix perfomance

2019-01-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109021

--- Comment #8 from Dmitry  ---
OK. Then close.

Hopefully pay attention to another important bug report.
https://bugs.freedesktop.org/show_bug.cgi?id=109015

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109242] [RADV] The Witcher 3 system freeze

2019-01-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109242

--- Comment #2 from Dmitry  ---
Before that, there were no heavy loads, and hangs during active combat.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109140] [KBL-G][GL] KHR-GL43.compute_shader.max test failed

2019-01-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109140

Hai  changed:

   What|Removed |Added

 QA Contact|dri-devel@lists.freedesktop |mesa-dev@lists.freedesktop.
   |.org|org
   Assignee|dri-devel@lists.freedesktop |mesa-dev@lists.freedesktop.
   |.org|org
   Hardware|Other   |x86-64 (AMD64)

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109021] Kaveri no fix perfomance

2019-01-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109021

--- Comment #9 from Dmitry  ---
More precisely this, sorry.
https://bugs.freedesktop.org/show_bug.cgi?id=109017

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/16] nir: improve convert_yuv_to_rgb when fuse_ffma=true

2019-01-07 Thread Lionel Landwerlin
I did not but then saw someone pointed out an issue with this particular 
patch.

I can do tomorrow.
Do you have link to the updated series?

Thanks,

-
Lionel

On 07/01/2019 16:54, Jonathan Marek wrote:

Hi,

Did you get a chance try this? If not, I might be able to try it 
myself as I have Intel HW.


On 12/19/18 12:34 PM, Lionel Landwerlin wrote:

Hey Jonathan,

I'm kind of curious as to whether we can have a single expression 
that pretty much generates the same final code (through some of the 
algebraic lowering/optimizations).

I'll give it a try on Intel HW, see what it does.

-
Lionel

On 19/12/2018 16:39, Jonathan Marek wrote:
When ffma is available, we can use a different arrangement of 
constants to

get a better result. On freedreno/ir3, this reduces the YUV->RGB to 7
scalar ffma. On freedreno/a2xx, it will allow YUV->RGB to be 3 vec4 
ffma.


Signed-off-by: Jonathan Marek 
---
  src/compiler/nir/nir_lower_tex.c | 62 
++--

  1 file changed, 43 insertions(+), 19 deletions(-)

diff --git a/src/compiler/nir/nir_lower_tex.c 
b/src/compiler/nir/nir_lower_tex.c

index 6a6b6c41a7..f7c821bb34 100644
--- a/src/compiler/nir/nir_lower_tex.c
+++ b/src/compiler/nir/nir_lower_tex.c
@@ -342,25 +342,49 @@ convert_yuv_to_rgb(nir_builder *b, 
nir_tex_instr *tex,

 nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
 nir_ssa_def *a)
  {
-   nir_const_value m[3] = {
-  { .f32 = { 1.0f,  0.0f, 1.59602678f, 0.0f } },
-  { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } },
-  { .f32 = { 1.0f,  2.01723214f,  0.0f,    0.0f } }
-   };
-
-   nir_ssa_def *yuv =
-  nir_vec4(b,
-   nir_fmul(b, nir_imm_float(b, 1.16438356f),
-    nir_fadd(b, y, nir_imm_float(b, -16.0f / 
255.0f))),
-   nir_channel(b, nir_fadd(b, u, nir_imm_float(b, 
-128.0f / 255.0f)), 0),
-   nir_channel(b, nir_fadd(b, v, nir_imm_float(b, 
-128.0f / 255.0f)), 0),

-   nir_imm_float(b, 0.0));
-
-   nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, 
m[0]));
-   nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, 
m[1]));
-   nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, 
m[2]));

-
-   nir_ssa_def *result = nir_vec4(b, red, green, blue, a);
+   nir_ssa_def *result;
+
+
+   if (b->shader->options->fuse_ffma) {
+  nir_const_value m[4] = {
+ { .f32 = { 1.16438356f, 1.16438356f, 1.16438356f, 0.0f } },
+ { .f32 = { 0.0f,   -0.39176229f, 2.01723214f, 0.0f } },
+ { .f32 = { 1.59602678f,-0.81296764f, 0.0f, 0.0f } },
+  };
+  static const float y_off = -16.0f * 1.16438356f / 255.0f;
+  static const float sc = 128.0f / 255.0f;
+
+  nir_ssa_def *offset =
+ nir_vec4(b,
+  nir_imm_float(b, y_off - sc * 1.59602678f),
+  nir_imm_float(b, y_off + sc * (0.81296764f + 
0.39176229f)),

+  nir_imm_float(b, y_off - sc * 2.01723214f),
+  a);
+
+  result = nir_ffma(b, y, nir_build_imm(b, 4, 32, m[0]),
+   nir_ffma(b, u, nir_build_imm(b, 4, 32, m[1]),
+    nir_ffma(b, v, nir_build_imm(b, 4, 
32, m[2]), offset)));

+   } else {
+  nir_const_value m[3] = {
+ { .f32 = { 1.0f,  0.0f, 1.59602678f, 0.0f } },
+ { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } },
+ { .f32 = { 1.0f,  2.01723214f,  0.0f,    0.0f } }
+  };
+
+  nir_ssa_def *yuv =
+ nir_vec4(b,
+  nir_fmul(b, nir_imm_float(b, 1.16438356f),
+   nir_fadd(b, y, nir_imm_float(b, -16.0f / 
255.0f))),
+  nir_channel(b, nir_fadd(b, u, nir_imm_float(b, 
-128.0f / 255.0f)), 0),
+  nir_channel(b, nir_fadd(b, v, nir_imm_float(b, 
-128.0f / 255.0f)), 0),

+  nir_imm_float(b, 0.0));
+
+  nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, 
m[0]));
+  nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 
32, m[1]));
+  nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, 
m[2]));

+
+  result = nir_vec4(b, red, green, blue, a);
+   }
 nir_ssa_def_rewrite_uses(>dest.ssa, nir_src_for_ssa(result));
  }




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: correct typo in GLSL compilation error message

2019-01-07 Thread Timothy Arceri

On 8/1/19 2:51 am, Andres Gomez wrote:

On Mon, 2019-01-07 at 16:48 +0100, Erik Faye-Lund wrote:

On Mon, 2019-01-07 at 15:50 +0200, Andres Gomez wrote:

Correct a typo introduced by
037f68d81e1 ("glsl: apply align layout qualifier rules to block
offsets")

Cc: Timothy Arceri 
Signed-off-by: Andres Gomez 


This should probably have this tag:

Fixes: 037f68d81e1 "glsl: apply align layout qualifier rules to block
 offsets"


Didn't think it was important enough to add the tag which will pull
into the stable releases ...

... it will also be more traceable with it added so, I suppose I will
do so.


Seems fine to go into stable :)

Reviewed-by: Timothy Arceri 

Thanks!




With that added:

Reviewed-by: Erik Faye-Lund 


Thanks! ☺


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109204] [regression, bisected] retroarch's crt-royale shader crash radv

2019-01-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109204

Bas Nieuwenhuizen  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #15 from Bas Nieuwenhuizen  ---
https://gitlab.freedesktop.org/mesa/mesa/commit/70ed049cc6a682fba084cf57e06728fa5934881b

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109151] [KBL-G][vulkan] dEQP-VK.texture.explicit_lod.2d.sizes.31x55_nearest_linear_mipmap_nearest_repeat failed verification.

2019-01-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109151

Bas Nieuwenhuizen  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #10 from Bas Nieuwenhuizen  ---
https://gitlab.freedesktop.org/mesa/mesa/commit/3cc940277a45285d9932ed62398d7a54d2afffb8

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] meson: fix EGL/X11 build without GLX

2019-01-07 Thread Chia-I Wu
dep_xcb and others were not set under this configuration.

Signed-off-by: Chia-I Wu 
---
 meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/meson.build b/meson.build
index 709f77a9c7..601085e055 100644
--- a/meson.build
+++ b/meson.build
@@ -1361,7 +1361,7 @@ if with_platform_x11
 dep_xfixes = dependency('xfixes')
 dep_xcb_glx = dependency('xcb-glx', version : '>= 1.8.1')
   endif
-  if (with_any_vk or with_glx == 'dri' or
+  if (with_any_vk or with_glx == 'dri' or with_egl or
(with_gallium_vdpau or with_gallium_xvmc or with_gallium_va or
 with_gallium_omx != 'disabled'))
 dep_xcb = dependency('xcb')
-- 
2.20.1.97.g81188d93c3-goog

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac/nir_to_llvm: add missing parentheses in get_inst_tessfactor_writemask()

2019-01-07 Thread Timothy Arceri

On 8/1/19 10:50 am, Bas Nieuwenhuizen wrote:

I think this got fixed by
https://gitlab.freedesktop.org/mesa/mesa/commit/be6cee51c06dc72ac159bd75b4201c61952515bd
already?


ok thanks. Hadn't pulled that one in yet.



On Tue, Jan 8, 2019 at 12:28 AM Timothy Arceri  wrote:


Cc: Marek Olšák 
---
  src/amd/common/ac_nir_to_llvm.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 84dbe17457..c9e11142eb 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4086,9 +4086,9 @@ get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
 unsigned num_comps = intrin->dest.ssa.num_components;

 if (location == VARYING_SLOT_TESS_LEVEL_INNER)
-   writemask = ((1 << num_comps + 1) - 1) << first_component;
+   writemask = ((1 << (num_comps + 1)) - 1) << first_component;
 else if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
-   writemask = (((1 << num_comps + 1) - 1) << first_component) << 
4;
+   writemask = (((1 << (num_comps + 1)) - 1) << first_component) 
<< 4;

 return writemask;
  }
--
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [shaderdb 2/2] intel_stub: update stubbing logic for newer mesa

2019-01-07 Thread Lionel Landwerlin
DRI2 backend of Mesa started relying more on libdrm to open DRM
devices nodes and opening multiple fds on a single node. This change
keeps track of up to 10 fds so that stub works again.

Signed-off-by: Lionel Landwerlin 
---
 intel_stub.c | 98 +---
 1 file changed, 48 insertions(+), 50 deletions(-)

diff --git a/intel_stub.c b/intel_stub.c
index 8b8db64..9ac8ad3 100644
--- a/intel_stub.c
+++ b/intel_stub.c
@@ -23,9 +23,11 @@
 
 #define _GNU_SOURCE /* for RTLD_NEXT */
 
+#include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -37,6 +39,8 @@
 #include 
 #include 
 
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
 static void *(*libc_mmap)(void *addr, size_t len, int prot, int flags,
   int fildes, off_t off);
 static void *(*libc_mmap64)(void *addr, size_t len, int prot, int flags,
@@ -52,7 +56,39 @@ static int (*libc__fxstat64)(int ver, int fd, struct stat64 
*buf);
 static int (*libc_fcntl)(int fd, int cmd, int param);
 static ssize_t (*libc_readlink)(const char *pathname, char *buf, size_t 
bufsiz);
 
-static int drm_fd = 0xBEEF;
+int open64(const char*, int, ...) __attribute__((alias("open")));
+int fcntl64(int fd, int cmd, ...) __attribute__((alias("fcntl")));
+void* mmap64(void*, size_t, int, int, int, off_t) 
__attribute__((alias("mmap")));
+
+static int drm_fds[10];
+static int n_drm_fds = 0;
+
+static int create_drm_fd(int flags, mode_t mode)
+{
+   assert(n_drm_fds < ARRAY_SIZE(drm_fds));
+   drm_fds[n_drm_fds++] = libc_open("/dev/null", flags, mode);
+   return drm_fds[n_drm_fds - 1];
+}
+
+static void remove_drm_fd(int fd)
+{
+   for (int i = 0; i < n_drm_fds; i++) {
+   if (fd == drm_fds[i]) {
+   for (int j = i + 1; j < n_drm_fds; j++)
+   drm_fds[j - 1] = drm_fds[j];
+   n_drm_fds--;
+   return;
+   }
+   }
+}
+
+static bool is_drm_fd(int fd)
+{
+   for (int i = 0; i < n_drm_fds; i++)
+   if (fd == drm_fds[i])
+   return true;
+   return false;
+}
 
 #define DRM_MAJOR 226
 
@@ -89,45 +125,27 @@ open(const char *path, int flags, ...)
va_list args;
mode_t mode;
 
-   if (strcmp(path, "/dev/dri/renderD128") == 0)
-  return drm_fd;
-
va_start(args, flags);
mode = va_arg(args, int);
va_end(args);
 
-   return libc_open(path, flags, mode);
-}
-
-__attribute__ ((visibility ("default"))) int
-open64(const char *path, int flags, ...)
-{
-   va_list args;
-   mode_t mode;
-
if (strcmp(path, "/dev/dri/renderD128") == 0)
-  return drm_fd;
-
-   va_start(args, flags);
-   mode = va_arg(args, int);
-   va_end(args);
+  return create_drm_fd(flags, mode);
 
-   return libc_open64(path, flags, mode);
+   return libc_open(path, flags, mode);
 }
 
 __attribute__ ((visibility ("default"))) int
 close(int fd)
 {
-   if (fd == drm_fd)
-   return 0;
-
+   remove_drm_fd(fd);
return libc_close(fd);
 }
 
 __attribute__ ((visibility ("default"))) int
 fstat(int fd, struct stat *buf)
 {
-   if (fd == drm_fd) {
+   if (is_drm_fd(fd)) {
buf->st_mode = S_IFCHR |
(S_IRWXG | S_IRGRP |  S_IRWXU | S_IRUSR);
buf->st_uid = 0;
@@ -141,7 +159,7 @@ fstat(int fd, struct stat *buf)
 __attribute__ ((visibility ("default"))) int
 fstat64(int fd, struct stat64 *buf)
 {
-   if (fd == drm_fd) {
+   if (is_drm_fd(fd)) {
buf->st_mode = S_IFCHR |
(S_IRWXG | S_IRGRP |  S_IRWXU | S_IRUSR);
buf->st_uid = 0;
@@ -155,7 +173,7 @@ fstat64(int fd, struct stat64 *buf)
 __attribute__ ((visibility ("default"))) int
 __fxstat(int ver, int fd, struct stat *buf)
 {
-   if (fd == drm_fd) {
+   if (is_drm_fd(fd)) {
buf->st_mode = S_IFCHR |
(S_IRWXG | S_IRGRP |  S_IRWXU | S_IRUSR);
buf->st_rdev = makedev(DRM_MAJOR, 0);
@@ -170,7 +188,7 @@ __fxstat(int ver, int fd, struct stat *buf)
 __attribute__ ((visibility ("default"))) int
 __fxstat64(int ver, int fd, struct stat64 *buf)
 {
-   if (fd == drm_fd) {
+   if (is_drm_fd(fd)) {
buf->st_mode = S_IFCHR |
(S_IRWXG | S_IRGRP |  S_IRWXU | S_IRUSR);
buf->st_rdev = makedev(DRM_MAJOR, 0);
@@ -188,8 +206,8 @@ fcntl(int fd, int cmd, ...)
va_list args;
int param;
 
-   if (fd == drm_fd && cmd == F_DUPFD_CLOEXEC)
-   return drm_fd;
+   if (is_drm_fd(fd) && cmd == F_DUPFD_CLOEXEC)
+   return create_drm_fd(O_RDWR, O_CLOEXEC);
 
va_start(args, cmd);
param = va_arg(args, int);
@@ -202,7 +220,7 @@ __attribute__ ((visibility ("default"))) void *
 mmap(void *addr, size_t len, int 

[Mesa-dev] [shaderdb 0/2] Intel fixes

2019-01-07 Thread Lionel Landwerlin
We need a custom stub to run shaderdb and this was somewhat broken
with newer versions of Mesa.

Cheers,


Lionel Landwerlin (2):
  run: fix uninitialized value
  intel_stub: update stubbing logic for newer mesa

 intel_stub.c | 98 +---
 run.c|  2 +-
 2 files changed, 49 insertions(+), 51 deletions(-)

--
2.20.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [shaderdb 1/2] run: fix uninitialized value

2019-01-07 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 run.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/run.c b/run.c
index 1e415c9..f3e65a3 100644
--- a/run.c
+++ b/run.c
@@ -837,7 +837,7 @@ main(int argc, char **argv)
 enum shader_type type;
 unsigned num_shaders;
 bool use_separate_shader_objects;
-struct binding_list *binding;
+struct binding_list *binding = NULL;
 struct shader *shader = get_shaders(, , ,
 text, shader_test[i].filesize,
 , _shaders,
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] meson: fix EGL/X11 build without GLX

2019-01-07 Thread Dylan Baker
Quoting Chia-I Wu (2019-01-07 12:54:19)
> dep_xcb and others were not set under this configuration.
> 
> Signed-off-by: Chia-I Wu 
> ---
>  meson.build | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/meson.build b/meson.build
> index 709f77a9c7..601085e055 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -1361,7 +1361,7 @@ if with_platform_x11
>  dep_xfixes = dependency('xfixes')
>  dep_xcb_glx = dependency('xcb-glx', version : '>= 1.8.1')
>endif
> -  if (with_any_vk or with_glx == 'dri' or
> +  if (with_any_vk or with_glx == 'dri' or with_egl or
> (with_gallium_vdpau or with_gallium_xvmc or with_gallium_va or
>  with_gallium_omx != 'disabled'))
>  dep_xcb = dependency('xcb')
> -- 
> 2.20.1.97.g81188d93c3-goog
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reviewed-by: Dylan Baker 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] ac/nir_to_llvm: add missing parentheses in get_inst_tessfactor_writemask()

2019-01-07 Thread Timothy Arceri
Cc: Marek Olšák 
---
 src/amd/common/ac_nir_to_llvm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 84dbe17457..c9e11142eb 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4086,9 +4086,9 @@ get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
unsigned num_comps = intrin->dest.ssa.num_components;
 
if (location == VARYING_SLOT_TESS_LEVEL_INNER)
-   writemask = ((1 << num_comps + 1) - 1) << first_component;
+   writemask = ((1 << (num_comps + 1)) - 1) << first_component;
else if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
-   writemask = (((1 << num_comps + 1) - 1) << first_component) << 
4;
+   writemask = (((1 << (num_comps + 1)) - 1) << first_component) 
<< 4;
 
return writemask;
 }
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac/nir_to_llvm: add missing parentheses in get_inst_tessfactor_writemask()

2019-01-07 Thread Bas Nieuwenhuizen
I think this got fixed by
https://gitlab.freedesktop.org/mesa/mesa/commit/be6cee51c06dc72ac159bd75b4201c61952515bd
already?

On Tue, Jan 8, 2019 at 12:28 AM Timothy Arceri  wrote:
>
> Cc: Marek Olšák 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 84dbe17457..c9e11142eb 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -4086,9 +4086,9 @@ get_inst_tessfactor_writemask(nir_intrinsic_instr 
> *intrin)
> unsigned num_comps = intrin->dest.ssa.num_components;
>
> if (location == VARYING_SLOT_TESS_LEVEL_INNER)
> -   writemask = ((1 << num_comps + 1) - 1) << first_component;
> +   writemask = ((1 << (num_comps + 1)) - 1) << first_component;
> else if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
> -   writemask = (((1 << num_comps + 1) - 1) << first_component) 
> << 4;
> +   writemask = (((1 << (num_comps + 1)) - 1) << first_component) 
> << 4;
>
> return writemask;
>  }
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] freedreno/drm: sync uapi again

2019-01-07 Thread Chia-I Wu
"pad" was missing in Mesa's msm_drm.h.  sizeof(drm_msm_gem_info)
remains the same, but now the compiler initializes the field to
zero.

Buffer allocation results in EINVAL without this for me.

Cc: Rob Clark 
Cc: Kristian Høgsberg 
Signed-off-by: Chia-I Wu 
---
 src/freedreno/drm/msm_drm.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/freedreno/drm/msm_drm.h b/src/freedreno/drm/msm_drm.h
index 09f16fd7be..91a16b333c 100644
--- a/src/freedreno/drm/msm_drm.h
+++ b/src/freedreno/drm/msm_drm.h
@@ -122,6 +122,7 @@ struct drm_msm_gem_info {
__u32 info;   /* in - one of MSM_INFO_* */
__u64 value;  /* in or out */
__u32 len;/* in or out */
+   __u32 pad;
 };
 
 #define MSM_PREP_READ0x01
-- 
2.20.1.97.g81188d93c3-goog

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/16] nir: improve convert_yuv_to_rgb when fuse_ffma=true

2019-01-07 Thread Jonathan Marek
There's no updated series yet. This patch will work on its own and the 
issue that was pointed out doesn't affect behavior at all.


On 1/7/19 4:47 PM, Lionel Landwerlin wrote:
I did not but then saw someone pointed out an issue with this particular 
patch.

I can do tomorrow.
Do you have link to the updated series?

Thanks,

-
Lionel

On 07/01/2019 16:54, Jonathan Marek wrote:

Hi,

Did you get a chance try this? If not, I might be able to try it 
myself as I have Intel HW.


On 12/19/18 12:34 PM, Lionel Landwerlin wrote:

Hey Jonathan,

I'm kind of curious as to whether we can have a single expression 
that pretty much generates the same final code (through some of the 
algebraic lowering/optimizations).

I'll give it a try on Intel HW, see what it does.

-
Lionel

On 19/12/2018 16:39, Jonathan Marek wrote:
When ffma is available, we can use a different arrangement of 
constants to

get a better result. On freedreno/ir3, this reduces the YUV->RGB to 7
scalar ffma. On freedreno/a2xx, it will allow YUV->RGB to be 3 vec4 
ffma.


Signed-off-by: Jonathan Marek 
---
  src/compiler/nir/nir_lower_tex.c | 62 
++--

  1 file changed, 43 insertions(+), 19 deletions(-)

diff --git a/src/compiler/nir/nir_lower_tex.c 
b/src/compiler/nir/nir_lower_tex.c

index 6a6b6c41a7..f7c821bb34 100644
--- a/src/compiler/nir/nir_lower_tex.c
+++ b/src/compiler/nir/nir_lower_tex.c
@@ -342,25 +342,49 @@ convert_yuv_to_rgb(nir_builder *b, 
nir_tex_instr *tex,

 nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
 nir_ssa_def *a)
  {
-   nir_const_value m[3] = {
-  { .f32 = { 1.0f,  0.0f, 1.59602678f, 0.0f } },
-  { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } },
-  { .f32 = { 1.0f,  2.01723214f,  0.0f,    0.0f } }
-   };
-
-   nir_ssa_def *yuv =
-  nir_vec4(b,
-   nir_fmul(b, nir_imm_float(b, 1.16438356f),
-    nir_fadd(b, y, nir_imm_float(b, -16.0f / 
255.0f))),
-   nir_channel(b, nir_fadd(b, u, nir_imm_float(b, 
-128.0f / 255.0f)), 0),
-   nir_channel(b, nir_fadd(b, v, nir_imm_float(b, 
-128.0f / 255.0f)), 0),

-   nir_imm_float(b, 0.0));
-
-   nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, 
m[0]));
-   nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, 
m[1]));
-   nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, 
m[2]));

-
-   nir_ssa_def *result = nir_vec4(b, red, green, blue, a);
+   nir_ssa_def *result;
+
+
+   if (b->shader->options->fuse_ffma) {
+  nir_const_value m[4] = {
+ { .f32 = { 1.16438356f, 1.16438356f, 1.16438356f, 0.0f } },
+ { .f32 = { 0.0f,   -0.39176229f, 2.01723214f, 0.0f } },
+ { .f32 = { 1.59602678f,-0.81296764f, 0.0f, 0.0f } },
+  };
+  static const float y_off = -16.0f * 1.16438356f / 255.0f;
+  static const float sc = 128.0f / 255.0f;
+
+  nir_ssa_def *offset =
+ nir_vec4(b,
+  nir_imm_float(b, y_off - sc * 1.59602678f),
+  nir_imm_float(b, y_off + sc * (0.81296764f + 
0.39176229f)),

+  nir_imm_float(b, y_off - sc * 2.01723214f),
+  a);
+
+  result = nir_ffma(b, y, nir_build_imm(b, 4, 32, m[0]),
+   nir_ffma(b, u, nir_build_imm(b, 4, 32, m[1]),
+    nir_ffma(b, v, nir_build_imm(b, 4, 
32, m[2]), offset)));

+   } else {
+  nir_const_value m[3] = {
+ { .f32 = { 1.0f,  0.0f, 1.59602678f, 0.0f } },
+ { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } },
+ { .f32 = { 1.0f,  2.01723214f,  0.0f,    0.0f } }
+  };
+
+  nir_ssa_def *yuv =
+ nir_vec4(b,
+  nir_fmul(b, nir_imm_float(b, 1.16438356f),
+   nir_fadd(b, y, nir_imm_float(b, -16.0f / 
255.0f))),
+  nir_channel(b, nir_fadd(b, u, nir_imm_float(b, 
-128.0f / 255.0f)), 0),
+  nir_channel(b, nir_fadd(b, v, nir_imm_float(b, 
-128.0f / 255.0f)), 0),

+  nir_imm_float(b, 0.0));
+
+  nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, 
m[0]));
+  nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 
32, m[1]));
+  nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, 
m[2]));

+
+  result = nir_vec4(b, red, green, blue, a);
+   }
 nir_ssa_def_rewrite_uses(>dest.ssa, nir_src_for_ssa(result));
  }




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] ac/nir_to_llvm: add ac_are_tessfactors_def_in_all_invocs()

2019-01-07 Thread Timothy Arceri

On 8/1/19 6:43 am, Marek Olšák wrote:


On Mon, Dec 17, 2018 at 8:18 PM Timothy Arceri > wrote:


The following patch will use this with the radeonsi NIR backend
but I've added it to ac so we can use it with RADV in future.

This is a NIR implementation of the tgsi function
tgsi_scan_tess_ctrl().
---
  src/amd/common/ac_nir_to_llvm.c | 161 
  src/amd/common/ac_nir_to_llvm.h |   2 +
  2 files changed, 163 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c
b/src/amd/common/ac_nir_to_llvm.c
index 4294956de1..055940b75f 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4063,3 +4063,164 @@ ac_lower_indirect_derefs(struct nir_shader
*nir, enum chip_class chip_class)

         nir_lower_indirect_derefs(nir, indirect_mask);
  }
+
+static unsigned
+get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
+{
+       if (intrin->intrinsic != nir_intrinsic_store_deref)
+               return 0;
+
+       nir_variable *var =
+ 
  nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0]));

+
+       if (var->data.mode != nir_var_shader_out)
+               return 0;
+
+       unsigned writemask = 0;
+       const int location = var->data.location;
+       unsigned first_component = var->data.location_frac;
+       unsigned num_comps = intrin->dest.ssa.num_components;
+
+       if (location == VARYING_SLOT_TESS_LEVEL_INNER)
+               writemask = ((1 << num_comps + 1) - 1) <<
first_component;


Parentheses are missing in "1 << num_comps + 1".

+       else if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
+               writemask = (((1 << num_comps + 1) - 1) <<
first_component) << 4;


Same here.


Good catch. I did test this code when writing it ... maybe these are 
scalars when we see them here. Anyway I'll fix this anyway and send a 
patch shortly.




Marek

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl/linker: specify proper direction in location aliasing error

2019-01-07 Thread Timothy Arceri

Reviewed-by: Timothy Arceri 

On 8/1/19 1:11 am, Andres Gomez wrote:

The check for location aliasing was always asuming output variables
but this validation is also called for input variables.

Fixes: e2abb75b0e4 ("glsl/linker: validate explicit locations for SSO programs")
Cc: Iago Toral Quiroga 
Signed-off-by: Andres Gomez 
---
  src/compiler/glsl/link_varyings.cpp | 15 ++-
  1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp
index 52e493cb599..3969c0120b3 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -481,9 +481,10 @@ check_location_aliasing(struct explicit_location_info 
explicit_locations[][4],
  /* Component aliasing is not alloed */
  if (comp >= component && comp < last_comp) {
 linker_error(prog,
-"%s shader has multiple outputs explicitly "
+"%s shader has multiple %sputs explicitly "
  "assigned to location %d and component %d\n",
  _mesa_shader_stage_to_string(stage),
+var->data.mode == ir_var_shader_in ? "in" : "out",
  location, comp);
 return false;
  } else {
@@ -502,10 +503,12 @@ check_location_aliasing(struct explicit_location_info 
explicit_locations[][4],
  
 if (info->interpolation != interpolation) {

linker_error(prog,
-   "%s shader has multiple outputs at explicit "
+   "%s shader has multiple %sputs at explicit "
 "location %u with different interpolation "
 "settings\n",
-   _mesa_shader_stage_to_string(stage), location);
+   _mesa_shader_stage_to_string(stage),
+   var->data.mode == ir_var_shader_in ?
+   "in" : "out", location);
return false;
 }
  
@@ -513,9 +516,11 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4],

 info->sample != sample ||
 info->patch != patch) {
linker_error(prog,
-   "%s shader has multiple outputs at explicit "
+   "%s shader has multiple %sputs at explicit "
 "location %u with different aux storage\n",
-   _mesa_shader_stage_to_string(stage), location);
+   _mesa_shader_stage_to_string(stage),
+   var->data.mode == ir_var_shader_in ?
+   "in" : "out", location);
return false;
 }
  }


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] freedreno/drm: sync uapi again

2019-01-07 Thread Kristian Høgsberg
Reviewed-by: Kristian H. Kristensen 

On Mon, Jan 7, 2019 at 4:27 PM Chia-I Wu  wrote:
>
> "pad" was missing in Mesa's msm_drm.h.  sizeof(drm_msm_gem_info)
> remains the same, but now the compiler initializes the field to
> zero.
>
> Buffer allocation results in EINVAL without this for me.
>
> Cc: Rob Clark 
> Cc: Kristian Høgsberg 
> Signed-off-by: Chia-I Wu 
> ---
>  src/freedreno/drm/msm_drm.h | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/freedreno/drm/msm_drm.h b/src/freedreno/drm/msm_drm.h
> index 09f16fd7be..91a16b333c 100644
> --- a/src/freedreno/drm/msm_drm.h
> +++ b/src/freedreno/drm/msm_drm.h
> @@ -122,6 +122,7 @@ struct drm_msm_gem_info {
> __u32 info;   /* in - one of MSM_INFO_* */
> __u64 value;  /* in or out */
> __u32 len;/* in or out */
> +   __u32 pad;
>  };
>
>  #define MSM_PREP_READ0x01
> --
> 2.20.1.97.g81188d93c3-goog
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] etnaviv: implement check_resource_capability

2019-01-07 Thread Lucas Stach
Hi Christian,

Am Freitag, den 28.12.2018, 11:04 +0100 schrieb Christian Gmeiner:
> Am Di., 18. Dez. 2018 um 19:09 Uhr schrieb Lucas Stach 
> :
> > 
> > We try to avoid sharing all resources with KMS side of renderonly, as this
> > adds some overhead that isn't really needed for most resources. If
> > someone tries to validate a resource for scanout, this is a good indication
> > that the sharing with the KMS side is actually needed.
> > 
> > > > Signed-off-by: Lucas Stach 
> > ---
> >  .../drivers/etnaviv/etnaviv_resource.c| 22 +++
> >  1 file changed, 22 insertions(+)
> > 
> > diff --git a/src/gallium/drivers/etnaviv/etnaviv_resource.c 
> > b/src/gallium/drivers/etnaviv/etnaviv_resource.c
> > index c00912880309..cd846e3ae36e 100644
> > --- a/src/gallium/drivers/etnaviv/etnaviv_resource.c
> > +++ b/src/gallium/drivers/etnaviv/etnaviv_resource.c
> > @@ -624,6 +624,27 @@ etna_resource_get_handle(struct pipe_screen *pscreen,
> > }
> >  }
> > 
> > +static bool
> > +etna_check_resource_capability(struct pipe_screen *pscreen,
> > +   struct pipe_resource *prsc,
> > +   unsigned bind)
> > +{
> > +   struct etna_screen *screen = etna_screen(pscreen);
> > +   struct etna_resource *rsc = etna_resource(prsc);
> > +
> > +   if ((bind & PIPE_BIND_LINEAR) && rsc->layout != ETNA_LAYOUT_LINEAR)
> > +  return false;
> > +
> > +   if ((bind & PIPE_BIND_SCANOUT) && !rsc->scanout) {
> > +  rsc->scanout = renderonly_create_gpu_import_for_resource(prsc, 
> > screen->ro,
> > +   NULL);
> 
> Can you explain me why the
> renderonly_create_gpu_import_for_resource(..) call ends up here? Are
> there really any cases where !rsc->scanout? I looked at the relevant
> code parts and I have the feeling
> the renderonly_create_gpu_import_for_resource(..) call happens much
> earlier in the call chain. I could be
> wrong as I am running out of coffee right now :)

There is one very relevant use-case where we end up with no scanout
resource: dma-buf import on the compositor side with Wayland. On
i.MX6qp we get a client buffer that is perfectly fine for putting
directly on a imx-drm on a plane.

When we import the buffer through GBM we end up with a etnaviv resource
without a scanout handle. GBM will make sure to validate the imported
buffer for scanout before trying to export the KMS handle though. So we
can construct the scanout handle at that point to allow the compositor
to push the client buffer directly onto the plane, skipping the
composition step in some cases.

Regards,
Lucas

> > +  if (!rsc->scanout)
> > + return false;
> > +   }
> > +
> > +   return true;
> > +}
> > +
> >  void
> >  etna_resource_used(struct etna_context *ctx, struct pipe_resource *prsc,
> > enum etna_resource_status status)
> > @@ -667,4 +688,5 @@ etna_resource_screen_init(struct pipe_screen *pscreen)
> > pscreen->resource_get_handle = etna_resource_get_handle;
> > pscreen->resource_changed = etna_resource_changed;
> > pscreen->resource_destroy = etna_resource_destroy;
> > +   pscreen->check_resource_capability = etna_check_resource_capability;
> >  }
> > --
> > 2.19.1
> > 
> > ___
> > etnaviv mailing list
> > etna...@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/etnaviv
> 
> 
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] llvmpipe: Always return some fence in flush (v2)

2019-01-07 Thread Tomasz Figa
On Sun, Dec 23, 2018 at 12:55 AM Roland Scheidegger  wrote:
>
> Alright, I guess it should work...
>
> Reviewed-by: Roland Scheidegger 
>

Thanks!

Would we have anyone who could help to commit it?

(I know that I was supposed to apply for commit rights, but I expect
my contribution rate to be relatively low, due to a shift to different
areas, so I don't think I'm a good candidate for a committer anymore.)

Best regards,
Tomasz

>
> Am 14.12.18 um 09:17 schrieb Tomasz Figa:
> > If there is no last fence, due to no rendering happening yet, just
> > create a new signaled fence and return it, to match the expectations of
> > the EGL sync fence API.
> >
> > Fixes random "Could not create sync fence 0x3003" assertion failures from
> > Skia on Android, coming from the following code:
> >
> > https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fandroid.googlesource.com%2Fplatform%2Fframeworks%2Fbase%2F%2B%2Fmaster%2Flibs%2Fhwui%2Fpipeline%2Fskia%2FSkiaOpenGLPipeline.cpp%23427data=02%7C01%7Csroland%40vmware.com%7Cbbfaf154367d449a153608d6619ca298%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636803722709844114sdata=Jau0msKOhAhDMMklBWCtuse40%2FMxpLR50vqjtiyVBYw%3Dreserved=0
> >
> > Reproducible especially with thread count >= 4.
> >
> > One could make the driver always keep the reference to the last fence,
> > but:
> >
> >  - the driver seems to explicitly destroy the fence whenever a rendering
> >pass completes and changing that would require a significant functional
> >change to the code. (Specifically, in lp_scene_end_rasterization().)
> >
> >  - it still wouldn't solve the problem of an EGL sync fence being created
> >and waited on without any rendering happening at all, which is
> >also likely to happen with Android code pointed to in the commit.
> >
> > Therefore, the simple approach of always creating a fence is taken,
> > similarly to other drivers, such as radeonsi.
> >
> > Tested with piglit llvmpipe suite with no regressions and following
> > tests fixed:
> >
> > egl_khr_fence_sync
> >  conformance
> >   eglclientwaitsynckhr_flag_sync_flush
> >   eglclientwaitsynckhr_nonzero_timeout
> >   eglclientwaitsynckhr_zero_timeout
> >   eglcreatesynckhr_default_attributes
> >   eglgetsyncattribkhr_invalid_attrib
> >   eglgetsyncattribkhr_sync_status
> >
> > v2:
> >  - remove the useless lp_fence_reference() dance (Nicolai),
> >  - explain why creating the dummy fence is the right approach.
> >
> > Signed-off-by: Tomasz Figa 
> > ---
> >  src/gallium/drivers/llvmpipe/lp_setup.c | 2 ++
> >  1 file changed, 2 insertions(+)
> >
> > diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c 
> > b/src/gallium/drivers/llvmpipe/lp_setup.c
> > index b087369473..e72e119c8a 100644
> > --- a/src/gallium/drivers/llvmpipe/lp_setup.c
> > +++ b/src/gallium/drivers/llvmpipe/lp_setup.c
> > @@ -361,6 +361,8 @@ lp_setup_flush( struct lp_setup_context *setup,
> >
> > if (fence) {
> >lp_fence_reference((struct lp_fence **)fence, setup->last_fence);
> > +  if (!*fence)
> > + *fence = (struct pipe_fence_handle *)lp_fence_create(0);
> > }
> >  }
> >
> >
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Chromium - Application-level nouveau blacklist

2019-01-07 Thread Stéphane Marchesin
On Sat, Jan 5, 2019 at 11:37 PM Jason Ekstrand  wrote:
>
> On Sat, Jan 5, 2019 at 2:40 PM Ilia Mirkin  wrote:
>>
>> It looks like as of Chromium 71, nouveau is completely blacklisted.
>
>
> That's rather unfortunate. :-(  The intel mesa drivers were also blacklisted 
> for quite some time a while back.  I'm not really sure what we did to get 
> blacklisted or what we did to get unblacklisted.
>

One major difference is that we have shipped Chromebooks with
intel-based GPUs for ~8 years, so we (collectively, intel and Chrome
OS folks) have fixed the long tail of Chrome bugs for Chrome OS +
Intel, and Linux benefited as a side effect.


>>
>> I don't really see a way back from this, since they don't cite any
>> easily reproducible issues, except that some people had some issues
>> with indeterminate hardware and indeterminate versions of mesa.
>>
>> In the bug that triggered this
>> (https://bugs.chromium.org/p/chromium/issues/detail?id=876523), where
>> I might have slightly lost my cool, they (at the end) suggested that
>> we try to make nouveau a first-class citizen with chromium. However I
>> will never be able to present concrete evidence that inconcrete issues
>> are resolved. I did run the WebGL CTS suite, but that resulted in some
>> hangs from the the max-texture-size-equivalent test, and some
>> browser-level weirdness after some tests where later tests all fail
>> (due to what I have to assume is a browser bug). I don't think I
>> managed to properly track down the true reason why. I didn't want to
>> reach out to them with such results, as that's just further evidence
>> of nouveau not working perfectly.
>
>
> If you want concrete bugs to fix, I highly recommend OpenGL[ES] conformance 
> tests, dEQP, and the WebGL CTS (which is mostly a re-hash of the OpenGL ES 
> 3.0 CTS).  Google cares quite a bit about driver conformance and are much 
> more likely to consider nouveau to be high-quality if those test suites are 
> in good shape.  Years of experience dealing with Google says that dEQP 
> results speak much louder than philosophical arguments about who should 
> decide whether or not Chromium should accept the distro GL.  Fortunately for 
> you, the well funded driver teams (Intel and AMD) have already done a lot of 
> the painful work of getting a lot of the bugs and "bugs" out of core mesa and 
> galium.  What's left are likely real back-end driver bugs which may be 
> affecting some user somewhere so they're worth fixing.

The cause of this blacklist is not (lack of) deqp conformance, but
instead mostly automated crash reports. In other words, crashes in the
field where we have a backtrace but not necessarily a good repro case.
For someone building an application like Chrome, the multitude of
kernel+user space drivers+OS version+compositor combinations basically
makes each bug a very, very long investigation. I argued a long time
ago that we should try to get more communication going between Chrome
folks and Linux GPU driver folks to fix this, but quickly realized
that the task at hand is huge. You can only make a dent in it by being
very systematic about it. If someone wants to commit the time to do
that, I would be happy to help communication around these efforts.


>
>>
>> In the meanwhile, end users are losing accelerated WebGL which in
>> practice worked just fine (at least in my usage of it), and probably
>> some other functionality.
>>
>> One idea is to flip GL_VENDOR to some random string if chromium is
>> running. I don't like this idea, but I also don't have any great
>> alternatives. We can also just take this, as yet-another nail in the
>> nouveau coffin.
>
>
> You asked for opinions, so here you go. :-P  In my personal (and rather 
> disinterested) opinion, I would recommend against such measures.  The last 
> thing anyone needs is an arms race between nouveau and Chromium teams.  I 
> think the better short-term thing to do would be to provide some 
> documentation about WebGL and educate users about Chromium's 
> --ignore-gpu-blacklist option.  This documentation could go on the mesa 
> website or, likely more usefully, it could go in various distro wiki entries 
> about nouveau and/or general nvidia issues.  In the long term, what's needed 
> is improving nouveau quality and stability and re-building trust with the 
> Chromium team.  I'm not trying to attack nouveau here but the fact is that 
> trust has been lost due to an unfortunate history of mis-filed (against 
> Chromium) bugs.  That trust doesn't get re-built by nuclear solutions.


Yes I think the Chrome-side is very simple here: because there isn't
time or means for in-depth investigation, if a driver crashes too
much, it gets blacklisted. The situation is not unique, the GPU
blacklist file is 1700 lines:
https://chromium.googlesource.com/chromium/src/gpu/+/master/config/software_rendering_list.json

Anyway, IMO if the biggest crashers can be fixed, I think we could
eventually make a case to reenable.

Stéphane

>
> 

Re: [Mesa-dev] [PATCH 08/10] intel/fs: Remove existing lower_conversions pass.

2019-01-07 Thread Iago Toral
On Mon, 2019-01-07 at 12:02 -0800, Francisco Jerez wrote:
> Iago Toral  writes:
> 
> > On Sat, 2018-12-29 at 12:39 -0800, Francisco Jerez wrote:
> > > It's redundant with the functionality provided by lower_regioning
> > > now.
> > > ---
> > >  src/intel/Makefile.sources|   1 -
> > >  src/intel/compiler/brw_fs.cpp |   1 -
> > >  src/intel/compiler/brw_fs.h   |   1 -
> > >  .../compiler/brw_fs_lower_conversions.cpp | 132 
> > > 
> > > --
> > >  src/intel/compiler/meson.build|   1 -
> > >  5 files changed, 136 deletions(-)
> > >  delete mode 100644
> > > src/intel/compiler/brw_fs_lower_conversions.cpp
> > > 
> > > diff --git a/src/intel/Makefile.sources
> > > b/src/intel/Makefile.sources
> > > index 6b9874d2b80..fe06a57b42e 100644
> > > --- a/src/intel/Makefile.sources
> > > +++ b/src/intel/Makefile.sources
> > > @@ -62,7 +62,6 @@ COMPILER_FILES = \
> > >   compiler/brw_fs.h \
> > >   compiler/brw_fs_live_variables.cpp \
> > >   compiler/brw_fs_live_variables.h \
> > > - compiler/brw_fs_lower_conversions.cpp \
> > >   compiler/brw_fs_lower_pack.cpp \
> > >   compiler/brw_fs_lower_regioning.cpp \
> > >   compiler/brw_fs_nir.cpp \
> > > diff --git a/src/intel/compiler/brw_fs.cpp
> > > b/src/intel/compiler/brw_fs.cpp
> > > index caa7a798332..d6280d558ec 100644
> > > --- a/src/intel/compiler/brw_fs.cpp
> > > +++ b/src/intel/compiler/brw_fs.cpp
> > > @@ -6472,7 +6472,6 @@ fs_visitor::optimize()
> > > }
> > >  
> > > progress = false;
> > > -   OPT(lower_conversions);
> > > OPT(lower_regioning);
> > > if (progress) {
> > >OPT(opt_copy_propagation);
> > 
> > If you didn't do this in the previous patch, then maybe do it here:
> > 
> > if (OPT(lower_regioning)) {
> >...
> > }
> > 
> > and avoid resetting progress.
> > 
> 
> I left this lying around because there is another legalization pass
> coming up that should cause the same post-lowering optimization
> passes
> to be executed if progress is made.  I can clean things up though if
> you
> like, and re-introduce the reset of the progress flag in the future
> commit.

If we are still going to need it soon I think it is fine to leave it
like this.

> > > diff --git a/src/intel/compiler/brw_fs.h
> > > b/src/intel/compiler/brw_fs.h
> > > index 36825754931..7edaa3af43c 100644
> > > --- a/src/intel/compiler/brw_fs.h
> > > +++ b/src/intel/compiler/brw_fs.h
> > > @@ -165,7 +165,6 @@ public:
> > > bool lower_load_payload();
> > > bool lower_pack();
> > > bool lower_regioning();
> > > -   bool lower_conversions();
> > > bool lower_logical_sends();
> > > bool lower_integer_multiplication();
> > > bool lower_minmax();
> > > diff --git a/src/intel/compiler/brw_fs_lower_conversions.cpp
> > > b/src/intel/compiler/brw_fs_lower_conversions.cpp
> > > deleted file mode 100644
> > > index 145fb55f995..000
> > > --- a/src/intel/compiler/brw_fs_lower_conversions.cpp
> > > +++ /dev/null
> > > @@ -1,132 +0,0 @@
> > > -/*
> > > - * Copyright © 2015 Connor Abbott
> > > - *
> > > - * Permission is hereby granted, free of charge, to any person
> > > obtaining a
> > > - * copy of this software and associated documentation files (the
> > > "Software"),
> > > - * to deal in the Software without restriction, including
> > > without
> > > limitation
> > > - * the rights to use, copy, modify, merge, publish, distribute,
> > > sublicense,
> > > - * and/or sell copies of the Software, and to permit persons to
> > > whom
> > > the
> > > - * Software is furnished to do so, subject to the following
> > > conditions:
> > > - *
> > > - * The above copyright notice and this permission notice
> > > (including
> > > the next
> > > - * paragraph) shall be included in all copies or substantial
> > > portions of the
> > > - * Software.
> > > - *
> > > - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> > > KIND,
> > > EXPRESS OR
> > > - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > > MERCHANTABILITY,
> > > - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> > > EVENT SHALL
> > > - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > > DAMAGES
> > > OR OTHER
> > > - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> > > OTHERWISE,
> > > ARISING
> > > - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> > > OTHER DEALINGS
> > > - * IN THE SOFTWARE.
> > > - */
> > > -
> > > -#include "brw_fs.h"
> > > -#include "brw_cfg.h"
> > > -#include "brw_fs_builder.h"
> > > -
> > > -using namespace brw;
> > > -
> > > -static bool
> > > -supports_type_conversion(const fs_inst *inst) {
> > > -   switch (inst->opcode) {
> > > -   case BRW_OPCODE_MOV:
> > > -   case SHADER_OPCODE_MOV_INDIRECT:
> > > -  return true;
> > > -   case BRW_OPCODE_SEL:
> > > -  return inst->dst.type == get_exec_type(inst);
> > > -   default:
> > > -  /* FIXME: We assume the opcodes don't explicitly mentioned
> > > -   

Re: [Mesa-dev] [PATCH 03/10] intel/fs: Fix bug in lower_simd_width while splitting an instruction which was already split.

2019-01-07 Thread Francisco Jerez
Iago Toral  writes:

> On Mon, 2019-01-07 at 11:58 -0800, Francisco Jerez wrote:
>> Iago Toral  writes:
>> 
>> > On Sat, 2018-12-29 at 12:38 -0800, Francisco Jerez wrote:
>> > > This seems to be a problem in combination with the
>> > > lower_regioning
>> > > pass introduced by a future commit, which can modify a SIMD-split
>> > > instruction causing its execution size to become illegal
>> > > again.  A
>> > > subsequent call to lower_simd_width() would hit this bug on a
>> > > future
>> > > platform.
>> > > 
>> > > Cc: mesa-sta...@lists.freedesktop.org
>> > > ---
>> > >  src/intel/compiler/brw_fs.cpp | 4 ++--
>> > >  1 file changed, 2 insertions(+), 2 deletions(-)
>> > > 
>> > > diff --git a/src/intel/compiler/brw_fs.cpp
>> > > b/src/intel/compiler/brw_fs.cpp
>> > > index 97544fdf465..4aacc72a1b7 100644
>> > > --- a/src/intel/compiler/brw_fs.cpp
>> > > +++ b/src/intel/compiler/brw_fs.cpp
>> > > @@ -5666,7 +5666,7 @@ static fs_reg
>> > >  emit_unzip(const fs_builder , fs_inst *inst, unsigned i)
>> > >  {
>> > > /* Specified channel group from the source region. */
>> > > -   const fs_reg src = horiz_offset(inst->src[i], lbld.group());
>> > > +   const fs_reg src = horiz_offset(inst->src[i], lbld.group() -
>> > > inst->group);
>> > 
>> > Should we assert that lbld.group >= inst->group? Same below.
>> > 
>> 
>> The IR will fail validation anytime that's not the case.  But I can
>> add
>> the assertions in both places if that makes you feel more
>> comfortable.
>
> I guess you are referring to this assert at codegen time:
>
> assert(inst->force_writemask_all || inst->group % inst->exec_size ==
> 0);
>
> I guess that is probably enough, but I would still prefer to add the
> asserts here too if that's okay.
>

Nah, I was thinking of the i965 IR validator that checks for
out-of-bounds VGRF register accesses.  But the asserts would be more
strict -- Just added them locally.

Thanks!

>> > > if (needs_src_copy(lbld, inst, i)) {
>> > >/* Builder of the right width to perform the copy avoiding
>> > > uninitialized
>> > > @@ -5757,7 +5757,7 @@ emit_zip(const fs_builder _before,
>> > > const
>> > > fs_builder _after,
>> > > assert(lbld_before.group() == lbld_after.group());
>> > >  
>> > > /* Specified channel group from the destination region. */
>> > > -   const fs_reg dst = horiz_offset(inst->dst,
>> > > lbld_after.group());
>> > > +   const fs_reg dst = horiz_offset(inst->dst, lbld_after.group()
>> > > -
>> > > inst->group);
>> > > const unsigned dst_size = inst->size_written /
>> > >inst->dst.component_size(inst->exec_size);
>> > >  


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/10] intel/fs: Fix bug in lower_simd_width while splitting an instruction which was already split.

2019-01-07 Thread Iago Toral
On Mon, 2019-01-07 at 11:58 -0800, Francisco Jerez wrote:
> Iago Toral  writes:
> 
> > On Sat, 2018-12-29 at 12:38 -0800, Francisco Jerez wrote:
> > > This seems to be a problem in combination with the
> > > lower_regioning
> > > pass introduced by a future commit, which can modify a SIMD-split
> > > instruction causing its execution size to become illegal
> > > again.  A
> > > subsequent call to lower_simd_width() would hit this bug on a
> > > future
> > > platform.
> > > 
> > > Cc: mesa-sta...@lists.freedesktop.org
> > > ---
> > >  src/intel/compiler/brw_fs.cpp | 4 ++--
> > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/src/intel/compiler/brw_fs.cpp
> > > b/src/intel/compiler/brw_fs.cpp
> > > index 97544fdf465..4aacc72a1b7 100644
> > > --- a/src/intel/compiler/brw_fs.cpp
> > > +++ b/src/intel/compiler/brw_fs.cpp
> > > @@ -5666,7 +5666,7 @@ static fs_reg
> > >  emit_unzip(const fs_builder , fs_inst *inst, unsigned i)
> > >  {
> > > /* Specified channel group from the source region. */
> > > -   const fs_reg src = horiz_offset(inst->src[i], lbld.group());
> > > +   const fs_reg src = horiz_offset(inst->src[i], lbld.group() -
> > > inst->group);
> > 
> > Should we assert that lbld.group >= inst->group? Same below.
> > 
> 
> The IR will fail validation anytime that's not the case.  But I can
> add
> the assertions in both places if that makes you feel more
> comfortable.

I guess you are referring to this assert at codegen time:

assert(inst->force_writemask_all || inst->group % inst->exec_size ==
0);

I guess that is probably enough, but I would still prefer to add the
asserts here too if that's okay.

> > > if (needs_src_copy(lbld, inst, i)) {
> > >/* Builder of the right width to perform the copy avoiding
> > > uninitialized
> > > @@ -5757,7 +5757,7 @@ emit_zip(const fs_builder _before,
> > > const
> > > fs_builder _after,
> > > assert(lbld_before.group() == lbld_after.group());
> > >  
> > > /* Specified channel group from the destination region. */
> > > -   const fs_reg dst = horiz_offset(inst->dst,
> > > lbld_after.group());
> > > +   const fs_reg dst = horiz_offset(inst->dst, lbld_after.group()
> > > -
> > > inst->group);
> > > const unsigned dst_size = inst->size_written /
> > >inst->dst.component_size(inst->exec_size);
> > >  

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Assorted bug fixes and improvements back-ported from an internal branch.

2019-01-07 Thread Iago Toral
With the few changes we agreed during the review this series is:

Reviewed-by: Iago Toral Quiroga 

Jason, just in case you didn't follow: this series adds a regioning
lowering pass that will do exactly one of the things you suggested when
we were discussing how to improve handling of hardware restrictions in
the backend: it implements a FS IR lowering pass that addresses them
late after translation and optimization, which should also ensure that
other parts of the backend don't break things after translation, so I
think it is a good solution for at least some of the problems we
discussed.
 
Iago

On Sat, 2018-12-29 at 12:38 -0800, Francisco Jerez wrote:
> These are a number of fixes and clean-ups we've been carrying around
> for a while in an internal branch.  Most of the fixes are required
> for
> conformance of a future platform, but due to their nature some of
> them
> are likely to affect shipping platforms as well -- Especially the
> issues addressed by patches 1 and 5, and certainly the issue
> addressed
> by PATCH 2 which was causing Vulkan CTS failures on ICL.
> 
> PATCH 7 introduces a more automated approach to enforce any regioning
> restrictions of the hardware, which should be more reliable than the
> current approach of enforcing them manually at NIR translation time
> hoping that the optimizer will leave the workarounds untouched.  It
> has some potential to fix bugs in certain scenarios, but it's
> intrusive enough that it's not marked for inclusion in mesa-stable
> yet.
> 
> Patches 8-9 take advantage of the lowering pass in order to get rid
> of
> a bunch of code that is now redundant.  The code removed by PATCH 10
> has been redundant ever since the FS IR gained the ability to
> represent strided sources.
> 
> [PATCH 01/10] intel/fs: Handle source modifiers in
> lower_integer_multiplication().
> [PATCH 02/10] intel/fs: Implement quad swizzles on ICL+.
> [PATCH 03/10] intel/fs: Fix bug in lower_simd_width while splitting
> an instruction which was already split.
> [PATCH 04/10] intel/eu/gen7: Fix brw_MOV() with DF destination and
> strided source.
> [PATCH 05/10] intel/fs: Respect CHV/BXT regioning restrictions in
> copy propagation pass.
> [PATCH 06/10] intel/fs: Constify fs_inst::can_do_source_mods().
> [PATCH 07/10] intel/fs: Introduce regioning lowering pass.
> [PATCH 08/10] intel/fs: Remove existing lower_conversions pass.
> [PATCH 09/10] intel/fs: Remove nasty open-coded CHV/BXT 64-bit
> workarounds.
> [PATCH 10/10] intel/fs: Remove FS_OPCODE_UNPACK_HALF_2x16_SPLIT
> opcodes.
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >