[Mesa-dev] [PATCH] gallivm: Detect VSX separately from Altivec

2018-08-20 Thread Vicki Pfau
Previously gallivm would attempt to use VSX instructions on all systems
where it detected that Altivec is supported; however, VSX was added to
POWER long after Altivec, causing lots of crashes on older POWER/PPC
hardware, e.g. PPC Macs. By detecting VSX separately from Altivec we can
automatically disable it on hardware that supports Altivec but not VSX

Signed-off-by: Vicki Pfau 
---
 src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 21 +++
 src/gallium/auxiliary/util/u_cpu_detect.c | 14 -
 src/gallium/auxiliary/util/u_cpu_detect.h |  1 +
 3 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 79dbedbb56..fcbdd5050f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -650,26 +650,11 @@ 
lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
 * which are fixed in LLVM 4.0.
 *
 * With LLVM 4.0 or higher:
-* Make sure VSX instructions are ENABLED, unless
-* a) the entire -mattr option is overridden via GALLIVM_MATTRS, or
-* b) VSX instructions are explicitly enabled/disabled via GALLIVM_VSX=1 or 
0.
+* Make sure VSX instructions are ENABLED (if supported), unless
+* VSX instructions are explicitly enabled/disabled via GALLIVM_VSX=1 or 0.
 */
if (util_cpu_caps.has_altivec) {
-  char *env_mattrs = getenv("GALLIVM_MATTRS");
-  if (env_mattrs) {
- MAttrs.push_back(env_mattrs);
-  }
-  else {
- boolean enable_vsx = true;
- char *env_vsx = getenv("GALLIVM_VSX");
- if (env_vsx && env_vsx[0] == '0') {
-enable_vsx = false;
- }
- if (enable_vsx)
-MAttrs.push_back("+vsx");
- else
-MAttrs.push_back("-vsx");
-  }
+  MAttrs.push_back(util_cpu_caps.has_vsx ? "+vsx" : "-vsx");
}
 #endif
 #endif
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c 
b/src/gallium/auxiliary/util/u_cpu_detect.c
index 3c6ae4ea1a..14003aa769 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -133,6 +133,7 @@ check_os_altivec_support(void)
   signal(SIGILL, SIG_DFL);
} else {
   boolean enable_altivec = TRUE;/* Default: enable  if available, and 
if not overridden */
+  boolean enable_vsx = TRUE;
 #ifdef DEBUG
   /* Disabling Altivec code generation is not the same as disabling VSX 
code generation,
* which can be done simply by passing -mattr=-vsx to the LLVM compiler; 
cf.
@@ -144,6 +145,11 @@ check_os_altivec_support(void)
  enable_altivec = FALSE;
   }
 #endif
+  /* VSX instructions can be explicitly enabled/disabled via GALLIVM_VSX=1 
or 0 */
+  char *env_vsx = getenv("GALLIVM_VSX");
+  if (env_vsx && env_vsx[0] == '0') {
+ enable_vsx = FALSE;
+  }
   if (enable_altivec) {
  __lv_powerpc_canjump = 1;
 
@@ -153,8 +159,13 @@ check_os_altivec_support(void)
  :
  : "r" (-1));
 
- signal(SIGILL, SIG_DFL);
  util_cpu_caps.has_altivec = 1;
+
+ if (enable_vsx) {
+__asm __volatile("xxland %vs0, %vs0, %vs0");
+util_cpu_caps.has_vsx = 1;
+ }
+ signal(SIGILL, SIG_DFL);
   } else {
  util_cpu_caps.has_altivec = 0;
   }
@@ -536,6 +547,7 @@ util_cpu_detect(void)
   debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", 
util_cpu_caps.has_3dnow_ext);
   debug_printf("util_cpu_caps.has_xop = %u\n", util_cpu_caps.has_xop);
   debug_printf("util_cpu_caps.has_altivec = %u\n", 
util_cpu_caps.has_altivec);
+  debug_printf("util_cpu_caps.has_vsx = %u\n", util_cpu_caps.has_vsx);
   debug_printf("util_cpu_caps.has_neon = %u\n", util_cpu_caps.has_neon);
   debug_printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz);
   debug_printf("util_cpu_caps.has_avx512f = %u\n", 
util_cpu_caps.has_avx512f);
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h 
b/src/gallium/auxiliary/util/u_cpu_detect.h
index 7a63d55028..19f5567ca7 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.h
+++ b/src/gallium/auxiliary/util/u_cpu_detect.h
@@ -71,6 +71,7 @@ struct util_cpu_caps {
unsigned has_3dnow_ext:1;
unsigned has_xop:1;
unsigned has_altivec:1;
+   unsigned has_vsx:1;
unsigned has_daz:1;
unsigned has_neon:1;
 
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv/lower_ycbcr: Use the binding array size for bounds checks

2018-08-20 Thread Dylan Baker
Quoting Lionel Landwerlin (2018-08-20 08:59:38)
> From: Jason Ekstrand 
> 
> Because lower_ycbcr gets called before apply_pipeline_layout, the
> indices are all logical and the binding layout HW size is actually too
> big for the bounds check.  We should just use the regular logical array
> size instead.
> 
> Fixes: f3e91e78a33 "anv: add nir lowering pass for ycbcr textures"
> Reviewed-by: Timothy Arceri 
> Reviewed-by: Lionel Landwerlin 
> (cherry picked from commit 320dacb0a051cd1736e0976f70467b68281edfbf)
> ---
>  src/intel/vulkan/anv_nir_lower_ycbcr_textures.c | 10 --
>  1 file changed, 4 insertions(+), 6 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c 
> b/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
> index ebf1fd9c267..e2b560364bc 100644
> --- a/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
> +++ b/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
> @@ -337,18 +337,16 @@ try_lower_tex_ycbcr(struct anv_pipeline_layout *layout,
> if (binding->immutable_samplers == NULL)
>return false;
>  
> -   unsigned texture_index = tex->texture_index;
> +   assert(tex->texture_index == 0);
> +   unsigned array_index = 0;
> if (tex->texture->deref.child) {
>assert(tex->texture->deref.child->deref_type == nir_deref_type_array);
>nir_deref_array *deref_array = 
> nir_deref_as_array(tex->texture->deref.child);
>if (deref_array->deref_array_type != nir_deref_array_type_direct)
>   return false;
> -  size_t hw_binding_size =
> - anv_descriptor_set_binding_layout_get_hw_size(binding);
> -  texture_index += MIN2(deref_array->base_offset, hw_binding_size - 1);
> +  array_index = MIN2(deref_array->base_offset, binding->array_size - 1);
> }
> -   const struct anv_sampler *sampler =
> -  binding->immutable_samplers[texture_index];
> +   const struct anv_sampler *sampler = 
> binding->immutable_samplers[array_index];
>  
> if (sampler->conversion == NULL)
>return false;
> -- 
> 2.18.0
> 

Thanks! Applied to staging/18.1

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107629] [regression][bisected] Build fails with nir_load_sample_id_no_per_sample being undefined

2018-08-20 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107629

Hi-Angel  changed:

   What|Removed |Added

 CC||robcl...@freedesktop.org

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] travis: make drivers explicit in Meson targets

2018-08-20 Thread Emil Velikov
On 20 August 2018 at 15:47, Juan A. Suarez Romero  wrote:
> On Mon, 2018-08-20 at 12:06 +0200, Juan A. Suarez Romero wrote:
>> On Tue, 2018-08-14 at 11:00 +0100, Emil Velikov wrote:
>> > On 8 August 2018 at 15:36, Juan A. Suarez Romero  
>> > wrote:
>> > > Like in the autotools target, make the list of drivers to be built in
>> > > each of the Meson targets explicit.
>> > >
>> > > This will help to identify missing dependencies and other issues more
>> > > easily.
>> > >
>> > > CC: Emil Velikov 
>> >
>> > Thanks Juan! Sorry for missing this :-(
>> >
>> > Do you know, which meson version got the ability to have the driver
>> > list, without the square brackets?
>>
>>
>> I think from the very beginning
>>
>
> Emil, is this patch R-b for you?
>
AFAICT the patch was committed, a bit before my reply.
Hence the "Sorry for missing this" and lack of R-B.

Regardless - huge thanks for this. FWIW
Reviewed-by; Emil Velikov 

HTH
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/eu: print bytes instead of 32 bit hex value

2018-08-20 Thread Matt Turner
Cool. This looks pretty good to me. A few comments inline.

On Wed, Aug 15, 2018 at 2:00 PM Sagar Ghuge  wrote:
>
> INTEL_DEBUG=hex prints 32 bit hex value
> and due to endianness of CPU byte order is
> reversed. In order to disassemble binary
> files, print each byte instead of 32 bit hex
> value.

Let's get your editor configured to line wrap at the correct length
(these lines are too short).

If you use vim, you should be able to automatically line wrap to the
appropriate length by highlighting the lines and then giving the
command 'gq'

> Signed-off-by: Sagar Ghuge 
> ---
>  src/intel/compiler/brw_eu.c | 24 
>  1 file changed, 16 insertions(+), 8 deletions(-)
>
> diff --git a/src/intel/compiler/brw_eu.c b/src/intel/compiler/brw_eu.c
> index 6ef0a6a577..223e561dff 100644
> --- a/src/intel/compiler/brw_eu.c
> +++ b/src/intel/compiler/brw_eu.c
> @@ -365,9 +365,14 @@ brw_disassemble(const struct gen_device_info *devinfo,
>if (compacted) {
>   brw_compact_inst *compacted = (void *)insn;
>  if (dump_hex) {
> -   fprintf(out, "0x%08x 0x%08x   ",
> -   ((uint32_t *)insn)[1],
> -   ((uint32_t *)insn)[0]);
> +   unsigned char * insn_ptr = ((unsigned char *)[0]);
> +   for (int i = 0 ; i < 8; i = i + 4) {
> +  fprintf(out, "%02x %02x %02x %02x ",
> +  insn_ptr[i],
> +  insn_ptr[i + 1],
> +  insn_ptr[i + 2],
> +  insn_ptr[i + 3]);
> +   }

I like printing the spaces between the bytes. That really shows more
clearly that this is a byte array and not subject to any endianness
issues.

One suggestion: let's print some blank spaces after the compacted
instruction hex so that the disassembled instruction vertically aligns
with uncompacted instructions. Currently we get disassembly that looks
like

01 0b 1d 20 00 7c 02 00 mov(8)  g124<1>Fg2.3<0,1,0>F
01 00 60 00 e8 3a a0 2f 5c 00 00 00 00 00 00 00 mov(8)
g125<1>Fg2.7<0,1,0>F

Also, we don't use tabs in i965. When editing old lines that had tabs,
let's take the opportunity to remove them.

My ~/.vimrc has

autocmd BufNewFile,BufRead /home/mattst88/projects/mesa/* set
expandtab tabstop=8 softtabstop=3 shiftwidth=3
autocmd BufNewFile,BufRead
/home/mattst88/projects/mesa/src/glsl/glcpp/* set noexpandtab
tabstop=8 softtabstop=8 shiftwidth=8
autocmd BufNewFile,BufRead
/home/mattst88/projects/mesa/src/glsl/glsl_parser.yy set noexpandtab
tabstop=8 shiftwidth=8
autocmd BufNewFile,BufRead /home/mattst88/projects/piglit/* set
noexpandtab tabstop=8 softtabstop=8 shiftwidth=8

to configure it appropriately for my Mesa and piglit directories.

With those couple of small nits fixed, this will earn my review.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/13] glsl: Add built-in functions for NV_shader_atomic_float

2018-08-20 Thread Caio Marcelo de Oliveira Filho
Hi,

> @@ -1133,6 +1159,9 @@ builtin_builder::create_intrinsics()
>  _atomic_intrinsic2(buffer_atomics_supported,
> glsl_type::int_type,
> ir_intrinsic_generic_atomic_add),
> +_atomic_intrinsic2(NV_shader_atomic_float_supported,
> +   glsl_type::float_type,
> +   ir_intrinsic_generic_atomic_add),
>  
> _atomic_counter_intrinsic1(shader_atomic_counter_ops_or_v460_desktop,
> ir_intrinsic_atomic_counter_add),
>  NULL);

(...)

> @@ -3185,6 +3220,9 @@ builtin_builder::create_builtins()
>  _atomic_op2("__intrinsic_atomic_exchange",
>  buffer_atomics_supported,
>  glsl_type::int_type),
> +_atomic_op2("__intrinsic_atomic_exchange",
> +shader_atomic_float_exchange,
> +glsl_type::float_type),
>  NULL);
> add_function("atomicCompSwap",
>  _atomic_op3("__intrinsic_atomic_comp_swap",

Question: why some builtins care about "supported" while other care
about the extension being "enabled"?



Thanks,
Caio
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallivm: Detect VSX separately from Altivec

2018-08-20 Thread Roland Scheidegger
u_cpu_detect should detect what's really available, not what is used
(though indeed we actually disable u_cpu bits explicitly in gallivm for
some sse features, but this is a hack).
So I think it would be better if u_cpu_detect sets the has_vsx bit
regardless what the env var is and then enable it based on this bit and
the env var.
Otherwise looks good to me.

Roland

Am 19.08.2018 um 23:17 schrieb Vicki Pfau:
> Previously gallivm would attempt to use VSX instructions on all systems
> where it detected that Altivec is supported; however, VSX was added to
> POWER long after Altivec, causing lots of crashes on older POWER/PPC
> hardware, e.g. PPC Macs. By detecting VSX separately from Altivec we can
> automatically disable it on hardware that supports Altivec but not VSX
> 
> Signed-off-by: Vicki Pfau 
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 21 +++
>  src/gallium/auxiliary/util/u_cpu_detect.c | 14 -
>  src/gallium/auxiliary/util/u_cpu_detect.h |  1 +
>  3 files changed, 17 insertions(+), 19 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
> b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> index 79dbedbb56..fcbdd5050f 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> @@ -650,26 +650,11 @@ 
> lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
>  * which are fixed in LLVM 4.0.
>  *
>  * With LLVM 4.0 or higher:
> -* Make sure VSX instructions are ENABLED, unless
> -* a) the entire -mattr option is overridden via GALLIVM_MATTRS, or
> -* b) VSX instructions are explicitly enabled/disabled via GALLIVM_VSX=1 
> or 0.
> +* Make sure VSX instructions are ENABLED (if supported), unless
> +* VSX instructions are explicitly enabled/disabled via GALLIVM_VSX=1 or 
> 0.
>  */
> if (util_cpu_caps.has_altivec) {
> -  char *env_mattrs = getenv("GALLIVM_MATTRS");
> -  if (env_mattrs) {
> - MAttrs.push_back(env_mattrs);
> -  }
> -  else {
> - boolean enable_vsx = true;
> - char *env_vsx = getenv("GALLIVM_VSX");
> - if (env_vsx && env_vsx[0] == '0') {
> -enable_vsx = false;
> - }
> - if (enable_vsx)
> -MAttrs.push_back("+vsx");
> - else
> -MAttrs.push_back("-vsx");
> -  }
> +  MAttrs.push_back(util_cpu_caps.has_vsx ? "+vsx" : "-vsx");
> }
>  #endif
>  #endif
> diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c 
> b/src/gallium/auxiliary/util/u_cpu_detect.c
> index 3c6ae4ea1a..14003aa769 100644
> --- a/src/gallium/auxiliary/util/u_cpu_detect.c
> +++ b/src/gallium/auxiliary/util/u_cpu_detect.c
> @@ -133,6 +133,7 @@ check_os_altivec_support(void)
>signal(SIGILL, SIG_DFL);
> } else {
>boolean enable_altivec = TRUE;/* Default: enable  if available, 
> and if not overridden */
> +  boolean enable_vsx = TRUE;
>  #ifdef DEBUG
>/* Disabling Altivec code generation is not the same as disabling VSX 
> code generation,
> * which can be done simply by passing -mattr=-vsx to the LLVM 
> compiler; cf.
> @@ -144,6 +145,11 @@ check_os_altivec_support(void)
>   enable_altivec = FALSE;
>}
>  #endif
> +  /* VSX instructions can be explicitly enabled/disabled via 
> GALLIVM_VSX=1 or 0 */
> +  char *env_vsx = getenv("GALLIVM_VSX");
> +  if (env_vsx && env_vsx[0] == '0') {
> + enable_vsx = FALSE;
> +  }
>if (enable_altivec) {
>   __lv_powerpc_canjump = 1;
>  
> @@ -153,8 +159,13 @@ check_os_altivec_support(void)
>   :
>   : "r" (-1));
>  
> - signal(SIGILL, SIG_DFL);
>   util_cpu_caps.has_altivec = 1;
> +
> + if (enable_vsx) {
> +__asm __volatile("xxland %vs0, %vs0, %vs0");
> +util_cpu_caps.has_vsx = 1;
> + }
> + signal(SIGILL, SIG_DFL);
>} else {
>   util_cpu_caps.has_altivec = 0;
>}
> @@ -536,6 +547,7 @@ util_cpu_detect(void)
>debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", 
> util_cpu_caps.has_3dnow_ext);
>debug_printf("util_cpu_caps.has_xop = %u\n", util_cpu_caps.has_xop);
>debug_printf("util_cpu_caps.has_altivec = %u\n", 
> util_cpu_caps.has_altivec);
> +  debug_printf("util_cpu_caps.has_vsx = %u\n", util_cpu_caps.has_vsx);
>debug_printf("util_cpu_caps.has_neon = %u\n", util_cpu_caps.has_neon);
>debug_printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz);
>debug_printf("util_cpu_caps.has_avx512f = %u\n", 
> util_cpu_caps.has_avx512f);
> diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h 
> b/src/gallium/auxiliary/util/u_cpu_detect.h
> index 7a63d55028..19f5567ca7 100644
> --- a/src/gallium/auxiliary/util/u_cpu_detect.h
> +++ b/src/gallium/auxiliary/util/u_cpu_detect.h
> @@ -71,6 +71,7 @@ struct util_cpu_caps {
> unsigned has_3dnow_ext:1;
>  

Re: [Mesa-dev] [PATCH] intel/tools: new i965_disasm tool

2018-08-20 Thread Matt Turner
On Thu, Aug 16, 2018 at 1:51 PM Sagar Ghuge  wrote:
>
> Adds a new i965 instruction disassemble tool

This looks very good. A few comments about the structure inline.

> Signed-off-by: Sagar Ghuge 
> ---
>  src/intel/Makefile.tools.am   |  15 +++
>  src/intel/tools/i965_disasm.c | 202 ++
>  src/intel/tools/i965_disasm.h |  46 
>  src/intel/tools/meson.build   |  11 ++
>  4 files changed, 274 insertions(+)
>  create mode 100644 src/intel/tools/i965_disasm.c
>  create mode 100644 src/intel/tools/i965_disasm.h
>
> diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
> index 00624084e6..36a3a70a28 100644
> --- a/src/intel/Makefile.tools.am
> +++ b/src/intel/Makefile.tools.am
> @@ -22,6 +22,7 @@
>  noinst_PROGRAMS += \
> tools/aubinator \
> tools/aubinator_error_decode \
> +   tools/i965_disasm \
> tools/error2aub
>
>
> @@ -62,6 +63,20 @@ tools_aubinator_error_decode_CFLAGS = \
> $(AM_CFLAGS) \
> $(ZLIB_CFLAGS)
>
> +tools_i965_disasm_SOURCES = \
> +   tools/i965_disasm.c \
> +   tools/i965_disasm.h
> +
> +tools_i965_disasm_LDADD = \
> +   common/libintel_common.la \
> +   compiler/libintel_compiler.la \
> +   dev/libintel_dev.la \
> +   $(top_builddir)/src/util/libmesautil.la \
> +   $(PTHREAD_LIBS)
> +
> +tools_i965_disasm_CFLAGS = \
> +   $(AM_CFLAGS)
> +

Looks good.

>  tools_error2aub_SOURCES = \
> tools/gen_context.h \
> diff --git a/src/intel/tools/i965_disasm.c b/src/intel/tools/i965_disasm.c
> new file mode 100644
> index 00..c880559827
> --- /dev/null
> +++ b/src/intel/tools/i965_disasm.c
> @@ -0,0 +1,202 @@
> +/*
> + * Copyright © 2018 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include 
> +#include 
> +
> +#include "compiler/brw_inst.h"
> +#include "compiler/brw_eu.h"
> +
> +#include "i965_disasm.h"
> +
> +uint64_t INTEL_DEBUG;
> +uint16_t pci_id = 0;
> +FILE *outfile;
> +
> +struct i965_disasm {
> +struct gen_device_info devinfo;
> +};
> +
> +/* Return size of file in bytes pointed by fp */
> +static size_t
> +i965_disasm_get_file_size(FILE *fp)
> +{
> +   size_t size = 0;

No need for initialization.

> +
> +   fseek(fp, 0L, SEEK_END);
> +   size = ftell(fp);
> +   fseek(fp, 0L, SEEK_SET);
> +
> +   return size;
> +}
> +
> +/* Return number of bytes read */
> +static size_t
> +i965_disasm_read_binary(FILE *fp, void **assembly)
> +{
> +   size_t end = i965_disasm_get_file_size(fp);
> +   *assembly = malloc(end + 1);
> +   fread(*assembly, end, 1, fp);
> +   fclose(fp);
> +
> +   return end;
> +}
> +
> +static void
> +print_help(const char *progname, FILE *file)
> +{
> +   fprintf(file,
> +   "Usage: %s [OPTION]...\n"
> +   "Disassemble i965 instructions from binary file.\n\n"
> +   "  --help display this help and exit\n"
> +   "  --binary-path=PATH read binary file from binary file 
> PATH\n"
> +   "  --gen=platform disassemble instructions for given \n"
> +   " platform (3 letter platform name)\n",
> +   progname);
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +   FILE *fp = NULL;
> +   void *assembly = NULL;
> +   char *binary_path = NULL;
> +   size_t start = 0, end = 0;
> +   int c, i;
> +   struct i965_disasm *disasm;
> +
> +   bool help = false;
> +   const struct option i965_disasm_opts[] = {
> +  { "help",  no_argument,   (int *) ,  true },
> +  { "binary-path",   required_argument, NULL,   'b' },
> +  { "gen",   required_argument, NULL,   'g'},
> +  { NULL,0, NULL,   0 }

I think we're missing a single space before the last 0 here for
vertical alignment.

> +   };
> +
> +   outfile = 

Re: [Mesa-dev] [PATCH 03/13] docs: Initial version of INTEL_shader_atomic_float_minmax spec

2018-08-20 Thread Ian Romanick
On 08/16/2018 06:02 PM, Caio Marcelo de Oliveira Filho wrote:
> Hello,
> 
>> +(add a new row after the exiting "atomicMax" table row, p. 179)
>> +
>> +float atomicMax(inout float mem, float data)
>> +
>> +Computes a new value by taking the maximum of the value of data and
>> +the contents of mem.  If one of these is an IEEE signaling NaN (i.e.,
> 
> The two lines above have different indentations, maybe the
> "Computes..." line should be starting with a TAB.

I'll check the indentation.  It should all be spaces, but there may be
issues.

>> +Interactions with OpenGL 4.6 and ARB_gl_spirv
>> +
>> +If OpenGL 4.6 or ARB_gl_spirv is supported, then
>> +SPV_INTEL_shader_atomic_float_minmax must also be supported.
> 
> Couldn't find the SPIR-V extension, but I guess is work in
> progress.  Is there a Vulkan extension too?

Both are in-progress, yeah.

>> +* atomicMin and atomicMax implement the IEEE specification with respect 
>> to
>> +  NaN.  IEEE considers two different kinds of NaN: signaling NaN and 
>> quiet
>> +  NaN.  A quiet NaN has the most significant bit of the mantissa set, 
>> and
>> +  a signaling NaN does not.  This concept does not exist in SPIR-V,
>> +  Vulkan, or OpenGL.  Let qNaN denote a quiet NaN and sNaN denote a
>> +  signaling NaN.  atomicMin and atomicMax specifically implement
>> +
>> +  - fmin(qNaN, x) = fmin(x, qNaN) = fmax(qNaN, x) = fmax(x, qNaN) = x
>> +  - fmin(sNaN, x) = fmin(x, sNaN) = fmax(sNaN, x) = fmax(x, sNaN) = sNaN
>> +  - fmin(sNaN, qNaN) = fmin(qNaN, sNaN) = fmax(sNaN, qNaN) =
>> +fmax(qNaN, sNaN) = sNaN
> 
> For the two items above, is it implicit that the result will be
> quietized?  This matters when using such result with other operation
> with a real value, e.g. is "fmin(fmin(x, sNaN), y)" equals to "y" or
> to "sNaN"?
> 
> The PRM for Skylake says
> 
> "Max(x, sNaN) = Max(sNaN, x) = qNaN (quietized value corresponding
> to the input sNaN) and signal the Invalid Operation exception."

That is interesting... the Bspec says something similar, but it has an
extra note that sNaN is not quieted on SKL+.  It /seems/ like this was
intention and won't be changed.  My tests on SKL seem to confirm that,
with respect to fmin and fmax, once you have sNaN, you will always have
sNaN.

> Thanks,
> Caio
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107629] [regression][bisected] Build fails with nir_load_sample_id_no_per_sample being undefined

2018-08-20 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107629

--- Comment #1 from Rob Clark  ---
this seems like you are somehow ending up with old versions of some of the
generated headers?

At one point, nir_intrinsics.h was not autogenerated.. maybe you have an old
copy of it lying around?

Do you get the same issue with a freshly cloned git tree?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/5] anv/lower_ycbcr: Use the binding array size for bounds checks

2018-08-20 Thread Dylan Baker
Quoting Jason Ekstrand (2018-08-08 01:12:49)
> Because lower_ycbcr gets called before apply_pipeline_layout, the
> indices are all logical and the binding layout HW size is actually too
> big for the bounds check.  We should just use the regular logical array
> size instead.
> 
> Fixes: f3e91e78a33 "anv: add nir lowering pass for ycbcr textures"
> ---
>  src/intel/vulkan/anv_nir_lower_ycbcr_textures.c | 10 --
>  1 file changed, 4 insertions(+), 6 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c 
> b/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
> index 5a971d9be39..71e511f34b7 100644
> --- a/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
> +++ b/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
> @@ -340,18 +340,16 @@ try_lower_tex_ycbcr(struct anv_pipeline_layout *layout,
> if (binding->immutable_samplers == NULL)
>return false;
>  
> -   unsigned texture_index = tex->texture_index;
> +   assert(tex->texture_index == 0);
> +   unsigned array_index = 0;
> if (deref->deref_type != nir_deref_type_var) {
>assert(deref->deref_type == nir_deref_type_array);
>nir_const_value *const_index = 
> nir_src_as_const_value(deref->arr.index);
>if (!const_index)
>   return false;
> -  size_t hw_binding_size =
> - anv_descriptor_set_binding_layout_get_hw_size(binding);
> -  texture_index += MIN2(const_index->u32[0], hw_binding_size - 1);
> +  array_index = MIN2(const_index->u32[0], binding->array_size - 1);
> }
> -   const struct anv_sampler *sampler =
> -  binding->immutable_samplers[texture_index];
> +   const struct anv_sampler *sampler = 
> binding->immutable_samplers[array_index];
>  
> if (sampler->conversion == NULL)
>return false;
> -- 
> 2.17.1
> 

Hi Jason,

f3e91e78a33 is present in 18.1, but this patch doesn't apply cleanly due to (I
think) your rework of how derefs work in NIR. Do you want to backport this patch
to 18.1, or just drop it?

Thanks,
Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] autotools: include git_sha1.h in dist tarball

2018-08-20 Thread Dylan Baker
Reviewed-by: Dylan Baker 

Quoting Juan A. Suarez Romero (2018-08-20 07:22:35)
> This fixes `make distcheck`.
> 
> Fixes: 471f708ed6 ("git_sha1: simplify logic")
> CC: Eric Engestrom 
> ---
>  src/Makefile.am | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/Makefile.am b/src/Makefile.am
> index 396865cbe55..412510f435b 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -26,7 +26,7 @@ git_sha1.h:
>  
>  BUILT_SOURCES = git_sha1.h
>  CLEANFILES = $(BUILT_SOURCES)
> -EXTRA_DIST = git_sha1.h.in meson.build
> +EXTRA_DIST = git_sha1.h meson.build
>  
>  SUBDIRS = . gtest util mapi/glapi/gen mapi
>  
> -- 
> 2.17.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3] intel/decoder: fix the possible out of bounds group_iter

2018-08-20 Thread asimiklit . work
From: Andrii Simiklit 

The "gen_group_get_length" function can return a negative value
and it can lead to the out of bounds group_iter.

v2: printing of "unknown command type" was added
v3: just the asserts are added

Signed-off-by: Andrii Simiklit 
---
 src/intel/common/gen_decoder.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/intel/common/gen_decoder.c b/src/intel/common/gen_decoder.c
index ec0a486..2d9609a 100644
--- a/src/intel/common/gen_decoder.c
+++ b/src/intel/common/gen_decoder.c
@@ -803,8 +803,10 @@ static bool
 iter_more_groups(const struct gen_field_iterator *iter)
 {
if (iter->group->variable) {
+  int length = gen_group_get_length(iter->group, iter->p);
+  assert(length >= 0 && "error the length is unknown!");
   return iter_group_offset_bits(iter, iter->group_iter + 1) <
-  (gen_group_get_length(iter->group, iter->p) * 32);
+  (length * 32);
} else {
   return (iter->group_iter + 1) < iter->group->group_count ||
  iter->group->next != NULL;
@@ -991,6 +993,7 @@ gen_field_iterator_init(struct gen_field_iterator *iter,
iter->p_bit = p_bit;
 
int length = gen_group_get_length(iter->group, iter->p);
+   assert(length >= 0 && "error the length is unknown!");
iter->p_end = length > 0 ? [length] : NULL;
iter->print_colors = print_colors;
 }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] appveyor: Set git core.autocrlf setting to true.

2018-08-20 Thread Roland Scheidegger
Looks good to me.
Reviewed-by: Roland Scheidegger 

Am 20.08.2018 um 13:21 schrieb Jose Fonseca:
> The git core.autocrlf setting defaults to true (ie, all text files get
> checked out as CRLF on Windows), except on Appveyor where's set to
> "input" (ie, all text files get checked out with the upstream
> repository's line endings, which for us typically means LF.)
> 
> And this was masking on Appveyor a regression in gen_xmlpool.py
> processing t_options.h with CRLF line endings.
> 
> This change makes core.autocrlf to be true, which would have enabled to
> immediately catch the issue, as seen in
> https://ci.appveyor.com/project/jrfonseca/mesa/build/51
> ---
>  appveyor.yml | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/appveyor.yml b/appveyor.yml
> index 86440f0d76a..73be3c57df8 100644
> --- a/appveyor.yml
> +++ b/appveyor.yml
> @@ -39,11 +39,18 @@ cache:
>  
>  os: Visual Studio 2015
>  
> +init:
> +# Appveyor defaults core.autocrlf to input instead of the default (true), but
> +# that can hide problems processing CRLF text on Windows
> +- git config --global core.autocrlf true
> +
>  environment:
>WINFLEXBISON_ARCHIVE: win_flex_bison-2.5.9.zip
>LLVM_ARCHIVE: llvm-5.0.1-msvc2015-mtd.7z
>  
>  install:
> +# Check git config
> +- git config core.autocrlf
>  # Check pip
>  - python --version
>  - python -m pip --version
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4] anv: add VK_EXT_sampler_filter_minmax support

2018-08-20 Thread Yunchao He
This extension can be supported on SKL+. With this patch,
all corresponding tests (6K+) in CTS can pass. No test fails.

I verified CTS with the command below:
deqp-vk --deqp-case=dEQP-VK.pipeline.sampler.view_type.*reduce*

v2: 1) support all depth formats, not depth-only formats, 2) fix
a wrong indention (Jason).

v3: fix a few nits (Lionel).

v4: fix failures in CI: disable sampler reduction when sampler
reduction mode is not specified via this extension (Lionel).
---
 src/intel/vulkan/anv_device.c  |  8 
 src/intel/vulkan/anv_extensions.py |  1 +
 src/intel/vulkan/anv_formats.c |  6 ++
 src/intel/vulkan/genX_state.c  | 28 
 4 files changed, 43 insertions(+)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 04fd6a829e..e45ba4b3af 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1116,6 +1116,14 @@ void anv_GetPhysicalDeviceProperties2(
  break;
   }
 
+  case 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: {
+ VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties =
+(VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext;
+ properties->filterMinmaxImageComponentMapping = pdevice->info.gen >= 
9;
+ properties->filterMinmaxSingleComponentFormats = true;
+ break;
+  }
+
   case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
  VkPhysicalDeviceSubgroupProperties *properties = (void *)ext;
 
diff --git a/src/intel/vulkan/anv_extensions.py 
b/src/intel/vulkan/anv_extensions.py
index ea837744b4..e165bd371d 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -125,6 +125,7 @@ EXTENSIONS = [
 Extension('VK_EXT_shader_stencil_export', 1, 'device->info.gen 
>= 9'),
 Extension('VK_EXT_vertex_attribute_divisor',  2, True),
 Extension('VK_EXT_post_depth_coverage',   1, 'device->info.gen 
>= 9'),
+Extension('VK_EXT_sampler_filter_minmax', 1, 'device->info.gen 
>= 9'),
 ]
 
 class VkVersion:
diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c
index 815b320a82..33faf7cc37 100644
--- a/src/intel/vulkan/anv_formats.c
+++ b/src/intel/vulkan/anv_formats.c
@@ -489,6 +489,9 @@ get_image_format_features(const struct gen_device_info 
*devinfo,
   if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT || devinfo->gen >= 8)
  flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
 
+  if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && devinfo->gen >= 9)
+ flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT_EXT;
+
   flags |= VK_FORMAT_FEATURE_BLIT_SRC_BIT |
VK_FORMAT_FEATURE_BLIT_DST_BIT |
VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
@@ -521,6 +524,9 @@ get_image_format_features(const struct gen_device_info 
*devinfo,
if (isl_format_supports_sampling(devinfo, plane_format.isl_format)) {
   flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
 
+  if (devinfo->gen >= 9)
+ flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT_EXT;
+
   if (isl_format_supports_filtering(devinfo, plane_format.isl_format))
  flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
}
diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
index b1014d9e79..2627851ed9 100644
--- a/src/intel/vulkan/genX_state.c
+++ b/src/intel/vulkan/genX_state.c
@@ -245,6 +245,14 @@ static const uint32_t vk_to_gen_shadow_compare_op[] = {
[VK_COMPARE_OP_ALWAYS]   = PREFILTEROPNEVER,
 };
 
+#if GEN_GEN >= 9
+static const uint32_t vk_to_gen_sampler_reduction_mode[] = {
+   [VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT] = STD_FILTER,
+   [VK_SAMPLER_REDUCTION_MODE_MIN_EXT]  = MINIMUM,
+   [VK_SAMPLER_REDUCTION_MODE_MAX_EXT]  = MAXIMUM,
+};
+#endif
+
 VkResult genX(CreateSampler)(
 VkDevice_device,
 const VkSamplerCreateInfo*  pCreateInfo,
@@ -266,6 +274,11 @@ VkResult genX(CreateSampler)(
uint32_t border_color_offset = device->border_colors.offset +
   pCreateInfo->borderColor * 64;
 
+#if GEN_GEN >= 9
+   unsigned sampler_reduction_mode = STD_FILTER;
+   bool enable_sampler_reduction = false;
+#endif
+
vk_foreach_struct(ext, pCreateInfo->pNext) {
   switch (ext->sType) {
   case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO: {
@@ -281,6 +294,16 @@ VkResult genX(CreateSampler)(
  sampler->conversion = conversion;
  break;
   }
+#if GEN_GEN >= 9
+  case VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT: {
+ struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction =
+(struct VkSamplerReductionModeCreateInfoEXT *) ext;
+ sampler_reduction_mode =
+

Re: [Mesa-dev] [PATCH v3] intel/decoder: fix the possible out of bounds group_iter

2018-08-20 Thread Lionel Landwerlin

On 20/08/2018 17:20, asimiklit.w...@gmail.com wrote:

From: Andrii Simiklit 

The "gen_group_get_length" function can return a negative value
and it can lead to the out of bounds group_iter.

v2: printing of "unknown command type" was added
v3: just the asserts are added

Signed-off-by: Andrii Simiklit 


Reviewed-by: Lionel Landwerlin 

Somebody should take a look at the other patches I sent out ;)
Thanks!

-
Lionel

---
  src/intel/common/gen_decoder.c | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/intel/common/gen_decoder.c b/src/intel/common/gen_decoder.c
index ec0a486..2d9609a 100644
--- a/src/intel/common/gen_decoder.c
+++ b/src/intel/common/gen_decoder.c
@@ -803,8 +803,10 @@ static bool
  iter_more_groups(const struct gen_field_iterator *iter)
  {
 if (iter->group->variable) {
+  int length = gen_group_get_length(iter->group, iter->p);
+  assert(length >= 0 && "error the length is unknown!");
return iter_group_offset_bits(iter, iter->group_iter + 1) <
-  (gen_group_get_length(iter->group, iter->p) * 32);
+  (length * 32);
 } else {
return (iter->group_iter + 1) < iter->group->group_count ||
   iter->group->next != NULL;
@@ -991,6 +993,7 @@ gen_field_iterator_init(struct gen_field_iterator *iter,
 iter->p_bit = p_bit;
  
 int length = gen_group_get_length(iter->group, iter->p);

+   assert(length >= 0 && "error the length is unknown!");
 iter->p_end = length > 0 ? [length] : NULL;
 iter->print_colors = print_colors;
  }



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/13] nir: Add floating point atomic min, max, and compare-swap instrinsics

2018-08-20 Thread Ian Romanick
On 08/17/2018 05:06 PM, Caio Marcelo de Oliveira Filho wrote:
> On Fri, Jun 22, 2018 at 10:03:54PM -0700, Ian Romanick wrote:
>> From: Ian Romanick 
>>
>> Signed-off-by: Ian Romanick 
>> ---
>>  src/compiler/glsl/glsl_to_nir.cpp| 32 
>> ++--
>>  src/compiler/nir/nir_intrinsics.py   | 11 +-
>>  src/compiler/nir/nir_lower_atomics_to_ssbo.c |  6 +-
>>  src/compiler/nir/nir_lower_io.c  |  9 
>>  4 files changed, 50 insertions(+), 8 deletions(-)
>>
>> diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
>> b/src/compiler/glsl/glsl_to_nir.cpp
>> index 90e960592a0..4d6f826f72f 100644
>> --- a/src/compiler/glsl/glsl_to_nir.cpp
>> +++ b/src/compiler/glsl/glsl_to_nir.cpp
>> @@ -735,6 +735,8 @@ nir_visitor::visit(ir_call *ir)
>>  op = nir_intrinsic_ssbo_atomic_imin;
>>   else if (ir->return_deref->type == glsl_type::uint_type)
>>  op = nir_intrinsic_ssbo_atomic_umin;
>> + else if (ir->return_deref->type == glsl_type::float_type)
>> +op = nir_intrinsic_ssbo_atomic_fmin;
>>   else
>>  unreachable("Invalid type");
>>   break;
>> @@ -744,6 +746,8 @@ nir_visitor::visit(ir_call *ir)
>>  op = nir_intrinsic_ssbo_atomic_imax;
>>   else if (ir->return_deref->type == glsl_type::uint_type)
>>  op = nir_intrinsic_ssbo_atomic_umax;
>> + else if (ir->return_deref->type == glsl_type::float_type)
>> +op = nir_intrinsic_ssbo_atomic_fmax;
>>   else
>>  unreachable("Invalid type");
>>   break;
>> @@ -751,7 +755,9 @@ nir_visitor::visit(ir_call *ir)
>>   op = nir_intrinsic_ssbo_atomic_exchange;
>>   break;
>>case ir_intrinsic_ssbo_atomic_comp_swap:
>> - op = nir_intrinsic_ssbo_atomic_comp_swap;
>> + op = ir->return_deref->type->is_integer_32_64()
>> +? nir_intrinsic_ssbo_atomic_comp_swap
>> +: nir_intrinsic_ssbo_atomic_fcomp_swap;
> 
> Why not compare to glsl_type::float_type?

No particular reason. :)  I guess if we ever get 64-bit float atomics,
we'd need a similar condition.  *shrug*

>>   break;
>>case ir_intrinsic_shader_clock:
>>   op = nir_intrinsic_shader_clock;
>> @@ -803,6 +809,8 @@ nir_visitor::visit(ir_call *ir)
>>  op = nir_intrinsic_shared_atomic_imin;
>>   else if (ir->return_deref->type == glsl_type::uint_type)
>>  op = nir_intrinsic_shared_atomic_umin;
>> + else if (ir->return_deref->type == glsl_type::float_type)
>> +op = nir_intrinsic_shared_atomic_fmin;
>>   else
>>  unreachable("Invalid type");
>>   break;
>> @@ -812,6 +820,8 @@ nir_visitor::visit(ir_call *ir)
>>  op = nir_intrinsic_shared_atomic_imax;
>>   else if (ir->return_deref->type == glsl_type::uint_type)
>>  op = nir_intrinsic_shared_atomic_umax;
>> + else if (ir->return_deref->type == glsl_type::float_type)
>> +op = nir_intrinsic_shared_atomic_fmax;
>>   else
>>  unreachable("Invalid type");
>>   break;
>> @@ -819,7 +829,9 @@ nir_visitor::visit(ir_call *ir)
>>   op = nir_intrinsic_shared_atomic_exchange;
>>   break;
>>case ir_intrinsic_shared_atomic_comp_swap:
>> - op = nir_intrinsic_shared_atomic_comp_swap;
>> + op = ir->return_deref->type->is_integer_32_64()
>> +? nir_intrinsic_shared_atomic_comp_swap
>> +: nir_intrinsic_shared_atomic_fcomp_swap;
>>   break;
>>case ir_intrinsic_vote_any:
>>   op = nir_intrinsic_vote_any;
>> @@ -1068,7 +1080,10 @@ nir_visitor::visit(ir_call *ir)
>>case nir_intrinsic_ssbo_atomic_xor:
>>case nir_intrinsic_ssbo_atomic_exchange:
>>case nir_intrinsic_ssbo_atomic_comp_swap:
>> -  case nir_intrinsic_ssbo_atomic_fadd: {
>> +  case nir_intrinsic_ssbo_atomic_fadd:
>> +  case nir_intrinsic_ssbo_atomic_fmin:
>> +  case nir_intrinsic_ssbo_atomic_fmax:
>> +  case nir_intrinsic_ssbo_atomic_fcomp_swap: {
>>   int param_count = ir->actual_parameters.length();
>>   assert(param_count == 3 || param_count == 4);
>>  
>> @@ -1089,7 +1104,8 @@ nir_visitor::visit(ir_call *ir)
>>  
>>   /* data2 parameter (only with atomic_comp_swap) */
>>   if (param_count == 4) {
>> -assert(op == nir_intrinsic_ssbo_atomic_comp_swap);
>> +assert(op == nir_intrinsic_ssbo_atomic_comp_swap ||
>> +   op == nir_intrinsic_ssbo_atomic_fcomp_swap);
>>  param = param->get_next();
>>  inst = (ir_instruction *) param;
>>  instr->src[3] = 
>> nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
>> @@ -1152,7 +1168,10 @@ nir_visitor::visit(ir_call *ir)
>>case nir_intrinsic_shared_atomic_xor:
>>case nir_intrinsic_shared_atomic_exchange:
>>case 

[Mesa-dev] [Bug 107629] [regression][bisected] Build fails with nir_load_sample_id_no_per_sample being undefined

2018-08-20 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107629

Bug ID: 107629
   Summary: [regression][bisected] Build fails with
nir_load_sample_id_no_per_sample being undefined
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Other
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: hi-an...@yandex.ru
QA Contact: mesa-dev@lists.freedesktop.org

Build fails with the following errors:

[77/1193] Compiling C object
'src/compiler/nir/src@compiler@nir@@nir@sta/meson-generated_.._nir_intrinsics.c.o'.
src/compiler/nir/nir_intrinsics.c:2039:1: warning: excess elements in
array initializer
 {
 ^
src/compiler/nir/nir_intrinsics.c:2039:1: note: (near initialization
for ‘nir_intrinsic_infos’)
[123/1193] Compiling C object
'src/compiler/nir/src@compiler@nir@@nir@sta/nir_lower_system_values.c.o'.
FAILED:
src/compiler/nir/src@compiler@nir@@nir@sta/nir_lower_system_values.c.o 
cc -Isrc/compiler/nir/src@compiler@nir@@nir@sta -Isrc/compiler/nir
-I../mesa/src/compiler/nir -Isrc/../include -I../mesa/src/../include -Isrc
-I../mesa/src -Isrc/mapi -I../mesa/src/mapi -Isrc/mesa -I../mesa/src/mesa
-I../mesa/src/gallium/include -Isrc/gallium/auxiliary
-I../mesa/src/gallium/auxiliary -Isrc/compiler -I../mesa/src/compiler
-Isrc/compiler/nir/../spirv -I../mesa/src/compiler/nir/../spirv
-Isrc/compiler/spirv -fdiagnostics-color=always -DNDEBUG -pipe
-D_FILE_OFFSET_BITS=64 -std=c99 -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS
-D__STDC_LIMIT_MACROS '-DVERSION="18.2.0-devel"' -DPACKAGE_VERSION=VERSION
'-DPACKAGE_BUGREPORT="https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa;'
-DGLX_USE_TLS -DHAVE_ST_VDPAU -DENABLE_ST_OMX_BELLAGIO=1
-DENABLE_ST_OMX_TIZONIA=0 -DHAVE_X11_PLATFORM -DGLX_INDIRECT_RENDERING
-DGLX_DIRECT_RENDERING -DGLX_USE_DRM -DHAVE_DRM_PLATFORM
-DHAVE_SURFACELESS_PLATFORM -DENABLE_SHADER_CACHE -DHAVE___BUILTIN_BSWAP32
-DHAVE___BUILTIN_BSWAP64 -DHAVE___BUILTIN_CLZ -DHAVE___BUILTIN_CLZLL
-DHAVE___BUILTIN_CTZ -DHAVE___BUILTIN_EXPECT -DHAVE___BUILTIN_FFS
-DHAVE___BUILTIN_FFSLL -DHAVE___BUILTIN_POPCOUNT -DHAVE___BUILTIN_POPCOUNTLL
-DHAVE___BUILTIN_UNREACHABLE -DHAVE_FUNC_ATTRIBUTE_CONST
-DHAVE_FUNC_ATTRIBUTE_FLATTEN -DHAVE_FUNC_ATTRIBUTE_MALLOC
-DHAVE_FUNC_ATTRIBUTE_PURE -DHAVE_FUNC_ATTRIBUTE_UNUSED
-DHAVE_FUNC_ATTRIBUTE_WARN_UNUSED_RESULT -DHAVE_FUNC_ATTRIBUTE_WEAK
-DHAVE_FUNC_ATTRIBUTE_FORMAT -DHAVE_FUNC_ATTRIBUTE_PACKED
-DHAVE_FUNC_ATTRIBUTE_RETURNS_NONNULL -DHAVE_FUNC_ATTRIBUTE_VISIBILITY
-DHAVE_FUNC_ATTRIBUTE_ALIAS -DHAVE_FUNC_ATTRIBUTE_NORETURN -D_GNU_SOURCE
-DUSE_SSE41 -DUSE_GCC_ATOMIC_BUILTINS -DUSE_X86_64_ASM -DMAJOR_IN_SYSMACROS
-DHAVE_SYS_SYSCTL_H -DHAVE_LINUX_FUTEX_H -DHAVE_ENDIAN_H -DHAVE_STRTOF
-DHAVE_MKOSTEMP -DHAVE_POSIX_MEMALIGN -DHAVE_TIMESPEC_GET -DHAVE_MEMFD_CREATE
-DHAVE_STRTOD_L -DHAVE_DLADDR -DHAVE_DL_ITERATE_PHDR -DHAVE_ZLIB -DHAVE_PTHREAD
-DHAVE_LIBDRM -DHAVE_LLVM=0x0800 -DMESA_LLVM_VERSION_PATCH=0 -DUSE_LIBGLVND=1
-DHAVE_WAYLAND_PLATFORM -DWL_HIDE_DEPRECATED -DHAVE_DRI3 -DHAVE_DRI3_MODIFIERS
-DHAVE_GALLIUM_EXTRA_HUD=1 -DHAVE_LIBSENSORS=1 -Wall
-Werror=implicit-function-declaration -Werror=missing-prototypes
-fno-math-errno -fno-trapping-math -Wno-missing-field-initializers
-march=native -O3 -fno-stack-protector -fweb -fno-semantic-interposition
-fmerge-all-constants -floop-nest-optimize -fPIC -fvisibility=hidden
-Werror=pointer-arith -Werror=vla -Wno-override-init  -MD -MQ
'src/compiler/nir/src@compiler@nir@@nir@sta/nir_lower_system_values.c.o' -MF
'src/compiler/nir/src@compiler@nir@@nir@sta/nir_lower_system_values.c.o.d' -o
'src/compiler/nir/src@compiler@nir@@nir@sta/nir_lower_system_values.c.o' -c
../mesa/src/compiler/nir/nir_lower_system_values.c
../mesa/src/compiler/nir/nir_lower_system_values.c: In function
‘convert_block’:
../mesa/src/compiler/nir/nir_lower_system_values.c:153:28: error:
implicit declaration of function ‘nir_load_sample_id_no_per_sample’; did you
mean ‘nir_load_sample_pos’? [-Werror=implicit-function-declaration]
   
nir_load_sample_id_no_per_sample(b));
   
^~~~
   
nir_load_sample_pos
../mesa/src/compiler/nir/nir_lower_system_values.c:153:28: warning:
passing argument 3 of ‘nir_ishl’ makes pointer from integer without a cast
[-Wint-conversion]
   
nir_load_sample_id_no_per_sample(b));
   
^~~
In file included from ../mesa/src/compiler/nir/nir_builder.h:379,
 from

Re: [Mesa-dev] [PATCH v4] anv: add VK_EXT_sampler_filter_minmax support

2018-08-20 Thread Lionel Landwerlin

On 20/08/2018 17:29, Yunchao He wrote:

This extension can be supported on SKL+. With this patch,
all corresponding tests (6K+) in CTS can pass. No test fails.

I verified CTS with the command below:
deqp-vk --deqp-case=dEQP-VK.pipeline.sampler.view_type.*reduce*

v2: 1) support all depth formats, not depth-only formats, 2) fix
a wrong indention (Jason).

v3: fix a few nits (Lionel).

v4: fix failures in CI: disable sampler reduction when sampler
reduction mode is not specified via this extension (Lionel).

Looks good to me :

Reviewed-by: Lionel Landwerlin 



---
  src/intel/vulkan/anv_device.c  |  8 
  src/intel/vulkan/anv_extensions.py |  1 +
  src/intel/vulkan/anv_formats.c |  6 ++
  src/intel/vulkan/genX_state.c  | 28 
  4 files changed, 43 insertions(+)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 04fd6a829e..e45ba4b3af 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1116,6 +1116,14 @@ void anv_GetPhysicalDeviceProperties2(
   break;
}
  
+  case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: {

+ VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties =
+(VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext;
+ properties->filterMinmaxImageComponentMapping = pdevice->info.gen >= 
9;
+ properties->filterMinmaxSingleComponentFormats = true;
+ break;
+  }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
   VkPhysicalDeviceSubgroupProperties *properties = (void *)ext;
  
diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py

index ea837744b4..e165bd371d 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -125,6 +125,7 @@ EXTENSIONS = [
  Extension('VK_EXT_shader_stencil_export', 1, 'device->info.gen 
>= 9'),
  Extension('VK_EXT_vertex_attribute_divisor',  2, True),
  Extension('VK_EXT_post_depth_coverage',   1, 'device->info.gen 
>= 9'),
+Extension('VK_EXT_sampler_filter_minmax', 1, 'device->info.gen 
>= 9'),
  ]
  
  class VkVersion:

diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c
index 815b320a82..33faf7cc37 100644
--- a/src/intel/vulkan/anv_formats.c
+++ b/src/intel/vulkan/anv_formats.c
@@ -489,6 +489,9 @@ get_image_format_features(const struct gen_device_info 
*devinfo,
if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT || devinfo->gen >= 8)
   flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
  
+  if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && devinfo->gen >= 9)

+ flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT_EXT;
+
flags |= VK_FORMAT_FEATURE_BLIT_SRC_BIT |
 VK_FORMAT_FEATURE_BLIT_DST_BIT |
 VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
@@ -521,6 +524,9 @@ get_image_format_features(const struct gen_device_info 
*devinfo,
 if (isl_format_supports_sampling(devinfo, plane_format.isl_format)) {
flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
  
+  if (devinfo->gen >= 9)

+ flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT_EXT;
+
if (isl_format_supports_filtering(devinfo, plane_format.isl_format))
   flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
 }
diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
index b1014d9e79..2627851ed9 100644
--- a/src/intel/vulkan/genX_state.c
+++ b/src/intel/vulkan/genX_state.c
@@ -245,6 +245,14 @@ static const uint32_t vk_to_gen_shadow_compare_op[] = {
 [VK_COMPARE_OP_ALWAYS]   = PREFILTEROPNEVER,
  };
  
+#if GEN_GEN >= 9

+static const uint32_t vk_to_gen_sampler_reduction_mode[] = {
+   [VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT] = STD_FILTER,
+   [VK_SAMPLER_REDUCTION_MODE_MIN_EXT]  = MINIMUM,
+   [VK_SAMPLER_REDUCTION_MODE_MAX_EXT]  = MAXIMUM,
+};
+#endif
+
  VkResult genX(CreateSampler)(
  VkDevice_device,
  const VkSamplerCreateInfo*  pCreateInfo,
@@ -266,6 +274,11 @@ VkResult genX(CreateSampler)(
 uint32_t border_color_offset = device->border_colors.offset +
pCreateInfo->borderColor * 64;
  
+#if GEN_GEN >= 9

+   unsigned sampler_reduction_mode = STD_FILTER;
+   bool enable_sampler_reduction = false;
+#endif
+
 vk_foreach_struct(ext, pCreateInfo->pNext) {
switch (ext->sType) {
case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO: {
@@ -281,6 +294,16 @@ VkResult genX(CreateSampler)(
   sampler->conversion = conversion;
   break;
}
+#if GEN_GEN >= 9
+  case VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT: {
+ struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction =
+(struct 

Re: [Mesa-dev] [PATCH 06/22] nir/format_convert: Add vec mask and sign-extend helpers

2018-08-20 Thread Jason Ekstrand
On Mon, Aug 20, 2018 at 2:42 AM Kenneth Graunke 
wrote:

> On Friday, August 17, 2018 1:06:12 PM PDT Jason Ekstrand wrote:
> > ---
> >  src/compiler/nir/nir_format_convert.h | 35 +--
> >  1 file changed, 27 insertions(+), 8 deletions(-)
> >
> > diff --git a/src/compiler/nir/nir_format_convert.h
> b/src/compiler/nir/nir_format_convert.h
> > index b1345f7263b..305273cdfdd 100644
> > --- a/src/compiler/nir/nir_format_convert.h
> > +++ b/src/compiler/nir/nir_format_convert.h
> > @@ -50,6 +50,32 @@ nir_mask_shift_or(struct nir_builder *b, nir_ssa_def
> *dst, nir_ssa_def *src,
> > return nir_ior(b, nir_mask_shift(b, src, src_mask, src_left_shift),
> dst);
> >  }
> >
> > +static inline nir_ssa_def *
> > +nir_format_mask_uvec(nir_builder *b, nir_ssa_def *src,
> > + const unsigned *bits)
> > +{
> > +   nir_const_value mask;
> > +   for (unsigned i = 0; i < src->num_components; i++) {
> > +  assert(bits[i] < 32);
> > +  mask.u32[i] = (1u << bits[i]) - 1;
> > +   }
> > +   return nir_iand(b, src, nir_build_imm(b, src->num_components, 32,
> mask));
> > +}
> > +
> > +static inline nir_ssa_def *
> > +nir_format_sign_extend_ivec(nir_builder *b, nir_ssa_def *src,
> > +const unsigned *bits)
> > +{
> > +   assert(src->num_components <= 4);
> > +   nir_ssa_def *comps[4];
> > +   for (unsigned i = 0; i < src->num_components; i++) {
> > +  nir_ssa_def *shift = nir_imm_int(b, src->bit_size - bits[i]);
> > +  comps[i] = nir_ishr(b, nir_ishl(b, nir_channel(b, src, i),
> shift), shift);
> > +   }
> > +   return nir_vec(b, comps, src->num_components);
> > +}
> > +
> > +
> >  static inline nir_ssa_def *
> >  nir_format_unpack_int(nir_builder *b, nir_ssa_def *packed,
> >const unsigned *bits, unsigned num_components,
> > @@ -117,14 +143,7 @@ static inline nir_ssa_def *
> >  nir_format_pack_uint(nir_builder *b, nir_ssa_def *color,
> >   const unsigned *bits, unsigned num_components)
> >  {
> > -   nir_const_value mask;
> > -   for (unsigned i = 0; i < num_components; i++) {
>
> This used to operate on the num_components parameter to
> nir_format_pack_uint, but now it operates on color->num_components
> instead.  That's probably OK...do we even need the parameter?
>

RE masking, yes the new masking helper doesn't take a number of
components.  If the number of components in the source SSA def is too
small, you're toast; if it's too large, the optimizer will eventually throw
away the unneeded instructions.

For the packing, however, the number of components is needed since it
controls how much of the bits array you read and how much you try to cram
into the resulting uint32.

--Jason


> Nothing actually uses this function in master today AFAICT...
>
> Patches 1-3 (with Bas's fixes) and 5-7 are:
> Reviewed-by: Kenneth Graunke 
>
> > -  assert(bits[i] < 32);
> > -  mask.u32[i] = (1u << bits[i]) - 1;
> > -   }
> > -   nir_ssa_def *mask_imm = nir_build_imm(b, num_components, 32, mask);
> > -
> > -   return nir_format_pack_uint_unmasked(b, nir_iand(b, color, mask_imm),
> > +   return nir_format_pack_uint_unmasked(b, nir_format_mask_uvec(b,
> color, bits),
> >  bits, num_components);
> >  }
> >
> >
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] anv/lower_ycbcr: Use the binding array size for bounds checks

2018-08-20 Thread Lionel Landwerlin
From: Jason Ekstrand 

Because lower_ycbcr gets called before apply_pipeline_layout, the
indices are all logical and the binding layout HW size is actually too
big for the bounds check.  We should just use the regular logical array
size instead.

Fixes: f3e91e78a33 "anv: add nir lowering pass for ycbcr textures"
Reviewed-by: Timothy Arceri 
Reviewed-by: Lionel Landwerlin 
(cherry picked from commit 320dacb0a051cd1736e0976f70467b68281edfbf)
---
 src/intel/vulkan/anv_nir_lower_ycbcr_textures.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c 
b/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
index ebf1fd9c267..e2b560364bc 100644
--- a/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
+++ b/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
@@ -337,18 +337,16 @@ try_lower_tex_ycbcr(struct anv_pipeline_layout *layout,
if (binding->immutable_samplers == NULL)
   return false;
 
-   unsigned texture_index = tex->texture_index;
+   assert(tex->texture_index == 0);
+   unsigned array_index = 0;
if (tex->texture->deref.child) {
   assert(tex->texture->deref.child->deref_type == nir_deref_type_array);
   nir_deref_array *deref_array = 
nir_deref_as_array(tex->texture->deref.child);
   if (deref_array->deref_array_type != nir_deref_array_type_direct)
  return false;
-  size_t hw_binding_size =
- anv_descriptor_set_binding_layout_get_hw_size(binding);
-  texture_index += MIN2(deref_array->base_offset, hw_binding_size - 1);
+  array_index = MIN2(deref_array->base_offset, binding->array_size - 1);
}
-   const struct anv_sampler *sampler =
-  binding->immutable_samplers[texture_index];
+   const struct anv_sampler *sampler = 
binding->immutable_samplers[array_index];
 
if (sampler->conversion == NULL)
   return false;
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105371] r600_shader_from_tgsi - GPR limit exceeded - shader requires 360 registers

2018-08-20 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105371

--- Comment #15 from amonpaike  ---
(In reply to Gert Wollny from comment #14)
> If by ESM error you refer to the overgright light blue artifact in the SPACE
> RACESHIP scene, then is is a driver problem. If you run 
> 
> R600_DEBUG=nosb blender 
> 

I've tested your suggestion, and actually the ESM shadows now work. Also other
less noticeable artifacts have disappeared ..

note that disabling this shader compiler the performance drops, I hope you can
find this bug.

thank you very much for your work

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/icl: Allow headerless sampler messages for pre-emptable contexts

2018-08-20 Thread Anuj Phogat
On Mon, Aug 20, 2018 at 12:18 AM Kenneth Graunke  wrote:
>
> On Friday, August 17, 2018 5:13:25 PM PDT Anuj Phogat wrote:
> > It fixes simulator warnings in piglit tests complaining about missing
> > support for headerless sampler messages for pre-emptable contexts.
> > Bit 5 in SAMPLER MODE register is newly introduced for ICLLP.
> >
> > Signed-off-by: Anuj Phogat 
> > ---
> >  src/mesa/drivers/dri/i965/brw_defines.h  |  4 
> >  src/mesa/drivers/dri/i965/brw_state_upload.c | 11 +++
> >  2 files changed, 15 insertions(+)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> > b/src/mesa/drivers/dri/i965/brw_defines.h
> > index 855f1c7d744..433314115b1 100644
> > --- a/src/mesa/drivers/dri/i965/brw_defines.h
> > +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> > @@ -1673,4 +1673,8 @@ enum brw_pixel_shader_coverage_mask_mode {
> >  # define GLK_SCEC_BARRIER_MODE_3D_HULL (1 << 7)
> >  # define GLK_SCEC_BARRIER_MODE_MASKREG_MASK(1 << 7)
> >
> > +#define GEN11_SAMPLER_MODE  0xE18C
> > +# define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS(1 << 5)
> > +# define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK   REG_MASK(1 << 
> > 5)
> > +
> >  #endif
> > diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
> > b/src/mesa/drivers/dri/i965/brw_state_upload.c
> > index 757426407c3..5a334b48892 100644
> > --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
> > +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
> > @@ -63,6 +63,17 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
> >
> > brw_upload_invariant_state(brw);
> >
> > +   if (devinfo->gen == 11) {
> > +  /*  The default behavior of bit 5 "Headerless Message for 
> > Pre-emptable
> > +   *  Contexts" in SAMPLER MODE register is set to 0, which means
> > +   *  headerless sampler messages are not allowed for pre-emptable
> > +   *  contexts. Set the bit 5 to 1 to allow them.
>
> Bonus space after the stars.  Can we also change this to:
>
>* contexts.  Set bit 5 to allow them.
>
> Same for the anv patch.  Either way, both are:
> Reviewed-by: Kenneth Graunke 
>
Fixed locally. Thanks.
> I don't know if people are trying to enable pre-emption during GPGPU
> work on pre-Gen11.  If so, that probably will not work, and we'd either
> need to avoid headerless messages (gross) or disable preemption (maybe
> also bad...)
>
Why do you think it'll be problem for Pre-Gen11? I don't see a bit disallowing
preemption+headerless message for pre-Gen11.

> > +   */
> > +  brw_load_register_imm32(brw, GEN11_SAMPLER_MODE,
> > +  
> > HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK |
> > +  HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS);
> > +   }
> > +
> > if (devinfo->gen == 10 || devinfo->gen == 11) {
> >/* From gen10 workaround table in h/w specs:
> > *
> >
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 00/11] nir: Add some optimizations on variables

2018-08-20 Thread Jason Ekstrand
ping

On Sun, Jul 29, 2018 at 12:44 AM Jason Ekstrand 
wrote:

> This is the second version of my optimization series for variables.  The
> first version, including the very descriptive cover letter can be found
> here:
>
> https://patchwork.freedesktop.org/series/47295/
>
> This second version contains changes to the array splitting and vector
> narrowing passes.  The array splitting pass is just cleaned up a bit to
> be more understandable and match the other passes better.  The vector
> narrowing pass is renamed to nir_shrink_vec_array_vars and is improved to
> also be able to shrink over-sized arrays.
>
> This series can be found on my gitlab here:
>
> https://gitlab.freedesktop.org/jekstrand/mesa/commits/wip/nir-var-opts
>
> Cc: Timothy Arceri 
> Cc: Caio Marcelo de Oliveira Filho 
>
> Jason Ekstrand (11):
>   util/list: Make some helpers take const lists
>   nir: Take if uses into account in ssa_def_components_read
>   nir/instr_set: Fix nir_instrs_equal for derefs
>   nir/types: Add array_or_matrix helpers
>   nir: Add a structure splitting pass
>   nir: Add an array splitting pass
>   intel/nir: Use the new structure and array splitting passes
>   nir: Add a array-of-vector variable shrinking pass
>   intel/nir: Use nir_shrink_vec_array_vars
>   nir: Add an array copy optimization
>   intel/nir: Enable nir_opt_find_array_copies
>
>  src/compiler/Makefile.sources|2 +
>  src/compiler/nir/meson.build |2 +
>  src/compiler/nir/nir.c   |3 +
>  src/compiler/nir/nir.h   |5 +
>  src/compiler/nir/nir_instr_set.c |4 +-
>  src/compiler/nir/nir_opt_find_array_copies.c |  383 +
>  src/compiler/nir/nir_split_vars.c| 1548 ++
>  src/compiler/nir_types.cpp   |   15 +
>  src/compiler/nir_types.h |2 +
>  src/intel/compiler/brw_nir.c |   19 +-
>  src/intel/compiler/brw_nir.h |3 +-
>  src/util/list.h  |8 +-
>  12 files changed, 1980 insertions(+), 14 deletions(-)
>  create mode 100644 src/compiler/nir/nir_opt_find_array_copies.c
>  create mode 100644 src/compiler/nir/nir_split_vars.c
>
> --
> 2.17.1
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] intel/eu: print bytes instead of 32 bit hex value

2018-08-20 Thread Sagar Ghuge
INTEL_DEBUG=hex prints 32 bit hex value and due to endianness of CPU
byte order is reversed. In order to disassemble binary files, print
each byte instead of 32 bit hex value.

v2: Print blank spaces in order to vertically align output of compacted
instructions hex value with uncompacted instructions hex value.
(Matt Turner)

Signed-off-by: Sagar Ghuge 
---
 src/intel/compiler/brw_eu.c | 48 -
 1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/src/intel/compiler/brw_eu.c b/src/intel/compiler/brw_eu.c
index 6ef0a6a577..ab87ae90e1 100644
--- a/src/intel/compiler/brw_eu.c
+++ b/src/intel/compiler/brw_eu.c
@@ -364,24 +364,38 @@ brw_disassemble(const struct gen_device_info *devinfo,
 
   if (compacted) {
  brw_compact_inst *compacted = (void *)insn;
-if (dump_hex) {
-   fprintf(out, "0x%08x 0x%08x   ",
-   ((uint32_t *)insn)[1],
-   ((uint32_t *)insn)[0]);
-}
-
-brw_uncompact_instruction(devinfo, , compacted);
-insn = 
-offset += 8;
+ if (dump_hex) {
+unsigned char * insn_ptr = ((unsigned char *)[0]);
+const unsigned int blank_spaces = 24;
+for (int i = 0 ; i < 8; i = i + 4) {
+   fprintf(out, "%02x %02x %02x %02x ",
+   insn_ptr[i],
+   insn_ptr[i + 1],
+   insn_ptr[i + 2],
+   insn_ptr[i + 3]);
+}
+/* Make compacted instructions hex value output
+ * vertically aligned with uncompacted instructions
+ * hex value
+ */
+fprintf(out, "%*c", blank_spaces, ' ');
+ }
+
+ brw_uncompact_instruction(devinfo, , compacted);
+ insn = 
+ offset += 8;
   } else {
-if (dump_hex) {
-   fprintf(out, "0x%08x 0x%08x 0x%08x 0x%08x ",
-   ((uint32_t *)insn)[3],
-   ((uint32_t *)insn)[2],
-   ((uint32_t *)insn)[1],
-   ((uint32_t *)insn)[0]);
-}
-offset += 16;
+ if (dump_hex) {
+unsigned char * insn_ptr = ((unsigned char *)[0]);
+for (int i = 0 ; i < 16; i = i + 4) {
+   fprintf(out, "%02x %02x %02x %02x ",
+   insn_ptr[i],
+   insn_ptr[i + 1],
+   insn_ptr[i + 2],
+   insn_ptr[i + 3]);
+}
+ }
+ offset += 16;
   }
 
   brw_disassemble_inst(out, devinfo, insn, compacted);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/icl: Allow headerless sampler messages for pre-emptable contexts

2018-08-20 Thread Kenneth Graunke
On Monday, August 20, 2018 10:26:29 AM PDT Anuj Phogat wrote:
> On Mon, Aug 20, 2018 at 12:18 AM Kenneth Graunke wrote:
[snip]
> > I don't know if people are trying to enable pre-emption during GPGPU
> > work on pre-Gen11.  If so, that probably will not work, and we'd either
> > need to avoid headerless messages (gross) or disable preemption (maybe
> > also bad...)
> >
> Why do you think it'll be problem for Pre-Gen11? I don't see a bit disallowing
> preemption+headerless message for pre-Gen11.

What's the more likely scenario?

Timeline A:

1. Preemption support was newly added; it didn't work with headerless
2. Somebody realized it was important and added a fix to make them work,
   but added it as a chicken bit that requires manual intervention.
3. They enabled the chicken bit by default, so things work right away.
4. They remove the chicken bit; the hardware just works.

Timeline B:

1. Preemption support was added and headerless works perfectly
2. Someone breaks headerless but adds a chicken bit to work around it
3. Someone enables the fix by default
4. They remove the chicken bit; the hardware just works.

IMHO, the first scenario is the most likely sequence of events.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] radv: place pointer length into cache uuid

2018-08-20 Thread Bas Nieuwenhuizen
On Mon, Aug 20, 2018 at 11:32 PM, Grazvydas Ignotas  wrote:
> Thanks to reproducible builds, binary file timestamps may be identical
> for both 32bit and 64bit packages when built from the same source.
> This means radv will use the same cache for both 32 and 64 bit
> processes, which leads to crashes.
>
> Conveniently there is a spare byte in cache_uuid, let's place the
> pointer size there.
>
> Fixes: f4e499ec79 "radv: add initial non-conformant radv vulkan driver"
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107601
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105904

Maybe an explicit

CC: 18.1 18.2 

?

> ---
>  src/amd/vulkan/radv_device.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index cc88abb57a8..79dbbd886d5 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -58,11 +58,11 @@ radv_device_get_cache_uuid(enum radeon_family family, 
> void *uuid)
> return -1;
>
> memcpy(uuid, _timestamp, 4);
> memcpy((char*)uuid + 4, _timestamp, 4);
> memcpy((char*)uuid + 8, , 2);
> -   snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
> +   snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv%zd", sizeof(void 
> *));
> return 0;
>  }
>
>  static void
>  radv_get_driver_uuid(void *uuid)
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] compiler: add SYSTEM_VALUE_VARYING_COORD

2018-08-20 Thread Bas Nieuwenhuizen
On Tue, Aug 21, 2018 at 12:38 AM, Marek Olšák  wrote:
> On Fri, Aug 10, 2018 at 9:26 AM Rob Clark  wrote:
>>
>> Used internally in freedreno/ir3 for the vec2 value that hw passes to
>> shader to use as coordinate for bary.f (varying fetch) instruction.
>> This is not the same as SYSTEM_VALUE_FRAG_COORD.
>>
>> Signed-off-by: Rob Clark 
>> ---
>> Up until now, we'd been hard-coding the location of this value (ie. to
>> r0.xy), mostly because originally in the early a3xx days I didn't know
>> which bits could configure this value (blob was always using r0.xy so
>> in cmdstream traces it always showed up as 0's).
>>
>> But starting with a6xx, the address register aliases r0.x, which kinda
>> throws a monkey-wrench in the existing scheme of hard-coding.  The good
>> news is that I know the bits to configure this value for a3xx-a6xx.
>>
>> So I'm shifting over to handling this like a sysval.
>>
>>  src/compiler/shader_enums.c| 1 +
>>  src/compiler/shader_enums.h| 6 ++
>>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 +
>>  3 files changed, 8 insertions(+)
>>
>> diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c
>> index a874083a0b7..0210b503d3f 100644
>> --- a/src/compiler/shader_enums.c
>> +++ b/src/compiler/shader_enums.c
>> @@ -244,6 +244,7 @@ gl_system_value_name(gl_system_value sysval)
>>   ENUM(SYSTEM_VALUE_DEVICE_INDEX),
>>   ENUM(SYSTEM_VALUE_VIEW_INDEX),
>>   ENUM(SYSTEM_VALUE_VERTEX_CNT),
>> + ENUM(SYSTEM_VALUE_VARYING_COORD),
>> };
>> STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX);
>> return NAME(sysval);
>> diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h
>> index f8e22925f35..5c36f55283c 100644
>> --- a/src/compiler/shader_enums.h
>> +++ b/src/compiler/shader_enums.h
>> @@ -601,6 +601,12 @@ typedef enum
>>  */
>> SYSTEM_VALUE_VERTEX_CNT,
>>
>> +   /**
>> +* Driver internal varying-coord, used for varying-fetch instructions.
>> +* Not externally visible.
>> +*/
>
> Can you improve the documentation, so that mere mortals understand
> what it means? Does it correspond to something in AMD hw?

I'd expect this to correspond to stuff like PERSP_CENTER on AMD?

Which begs the question, does this need distinguishing on
center/centroid/per-sample?

- Bas
>
> Marek
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium: Split out PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE.

2018-08-20 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek
On Mon, Aug 13, 2018 at 1:13 AM Kenneth Graunke  wrote:
>
> Some hardware can do PIPE_TEX_WRAP_MIRROR_REPEAT but not
> PIPE_TEX_WRAP_MIRROR_CLAMP and PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER.
>
> Drivers for such hardware would like to advertise support for
> ARB_texture_mirror_clamp_to_edge but not EXT_texture_mirror_clamp.
>
> This commit adds a new PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE bit,
> changes the extension enable to be based on that, and enables it
> in all upstream drivers which supported PIPE_CAP_TEXTURE_MIRROR_CLAMP
> (so they continue supporting this mode).
> ---
>  src/gallium/docs/source/screen.rst   | 6 --
>  src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 +
>  src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
>  src/gallium/drivers/i915/i915_screen.c   | 1 +
>  src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
>  src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
>  src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 1 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
>  src/gallium/drivers/r300/r300_screen.c   | 1 +
>  src/gallium/drivers/r600/r600_pipe.c | 1 +
>  src/gallium/drivers/radeonsi/si_get.c| 1 +
>  src/gallium/drivers/softpipe/sp_screen.c | 1 +
>  src/gallium/drivers/svga/svga_screen.c   | 1 +
>  src/gallium/drivers/swr/swr_screen.cpp   | 1 +
>  src/gallium/drivers/v3d/v3d_screen.c | 1 +
>  src/gallium/drivers/vc4/vc4_screen.c | 1 +
>  src/gallium/drivers/virgl/virgl_screen.c | 1 +
>  src/gallium/include/pipe/p_defines.h | 1 +
>  src/mesa/state_tracker/st_extensions.c   | 2 +-
>  19 files changed, 22 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/docs/source/screen.rst 
> b/src/gallium/docs/source/screen.rst
> index e85246c4778..f4484c7db81 100644
> --- a/src/gallium/docs/source/screen.rst
> +++ b/src/gallium/docs/source/screen.rst
> @@ -40,8 +40,10 @@ The integer capabilities:
>for a 3D texture.
>  * ``PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS``: The maximum number of mipmap levels 
> available
>for a cubemap.
> -* ``PIPE_CAP_TEXTURE_MIRROR_CLAMP``: Whether mirrored texture coordinates 
> with clamp
> -  are supported.
> +* ``PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE``: Whether mirrored texture 
> coordinates are
> +  supported with the clamp-to-edge wrap mode.
> +* ``PIPE_CAP_TEXTURE_MIRROR_CLAMP``: Whether mirrored texture coordinates 
> are supported
> +  with clamp or clamp-to-border wrap modes.
>  * ``PIPE_CAP_BLEND_EQUATION_SEPARATE``: Whether alpha blend equations may be 
> different
>from color blend equations, in :ref:`Blend` state.
>  * ``PIPE_CAP_SM3``: Whether the vertex shader and fragment shader support 
> equivalent
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
> b/src/gallium/drivers/etnaviv/etnaviv_screen.c
> index 35707e60445..f57e2e8b57e 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
> @@ -181,6 +181,7 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum 
> pipe_cap param)
> case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: /* no dual-source supported 
> */
> case PIPE_CAP_TEXTURE_MULTISAMPLE: /* no texture multisample */
> case PIPE_CAP_TEXTURE_MIRROR_CLAMP: /* only mirrored repeat */
> +   case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE: /* only mirrored repeat */
> case PIPE_CAP_INDEP_BLEND_ENABLE:
> case PIPE_CAP_INDEP_BLEND_FUNC:
> case PIPE_CAP_DEPTH_CLIP_DISABLE:
> diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
> b/src/gallium/drivers/freedreno/freedreno_screen.c
> index 4d54446ec7e..7d91a940817 100644
> --- a/src/gallium/drivers/freedreno/freedreno_screen.c
> +++ b/src/gallium/drivers/freedreno/freedreno_screen.c
> @@ -211,6 +211,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
> pipe_cap param)
> case PIPE_CAP_TGSI_TEXCOORD:
> case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
> case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
> +   case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
> case PIPE_CAP_QUERY_MEMORY_INFO:
> case PIPE_CAP_PCI_GROUP:
> case PIPE_CAP_PCI_BUS:
> diff --git a/src/gallium/drivers/i915/i915_screen.c 
> b/src/gallium/drivers/i915/i915_screen.c
> index dda7c5baee5..e63115c0cb0 100644
> --- a/src/gallium/drivers/i915/i915_screen.c
> +++ b/src/gallium/drivers/i915/i915_screen.c
> @@ -214,6 +214,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
> cap)
> case PIPE_CAP_INDEP_BLEND_FUNC:
> case PIPE_CAP_SHADER_STENCIL_EXPORT:
> case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
> +   case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
> case PIPE_CAP_TEXTURE_SWIZZLE:
> case PIPE_CAP_QUERY_TIME_ELAPSED:
> case PIPE_CAP_SM3:
> diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
> b/src/gallium/drivers/llvmpipe/lp_screen.c
> index 261bca907a6..d749bd041d6 100644
> 

Re: [Mesa-dev] [PATCH] llvmpipe: add cc clobber to inline asm

2018-08-20 Thread Roland Scheidegger
Am 20.08.2018 um 23:31 schrieb Grazvydas Ignotas:
> The bsr instruction modifies flags, so that needs to be indicated to the
> compiler. No effect on generated code, but still needed for correctness.
> ---
>  src/gallium/drivers/llvmpipe/lp_setup_tri.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c 
> b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
> index cec6198ec63..1852ec05d56 100644
> --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
> +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
> @@ -732,11 +732,12 @@ floor_pot(uint32_t n)
> if (n == 0)
>return 0;
>  
> __asm__("bsr %1,%0"
>: "=r" (n)
> -  : "rm" (n));
> +  : "rm" (n)
> +  : "cc");
> return 1 << n;
>  #else
> n |= (n >>  1);
> n |= (n >>  2);
> n |= (n >>  4);
> 

Looks alright (although my inline asm is a bit rusty), although I wonder
if maybe floor_pot() should use util_logbase2? Though it's not quite an
exact fit.

Or we could use __builtin_clz directly there based on HAVE___BUILTIN_CLZ.

As a side note, it actually seems tricky to get gcc to emit the
"correct" trivial sequence (tested with version 7.3.1).
If you do
int val = 1 << (31 - __builtin_clz(in));
it emits (-O3)
  bsr%eax,%eax
  mov$0x1f,%ecx
  xor$0x1f,%eax
  sub%eax,%ecx
  mov$0x1,%eax
  shl%cl,%eax
which isn't the end of the world, but it is quite optimization failure.

with -O3 -march=haswell it will figure it out:
  bsr%eax,%edx
  mov$0x1,%eax
  shlx   %edx,%eax,%eax

If you think you're clever and instead do
int val = 1 << (__builtin_clz(in) ^ 31);
(which is really the same thing)
gcc now is happy with -O3
  bsr%eax,%ecx
  mov$0x1,%eax
  shl%cl,%eax
Naturally, the sub is gone, and gcc recognized the xor 31 on top of its
own xor 31 for the lzcnt emulation cancel each other out.

but with -O3 -march=haswell it's a bit suboptimal now:
  mov$0x1,%edx
  lzcnt  %eax,%eax
  xor$0x1f,%eax
  shlx   %eax,%edx,%eax

So optimization is quite funny here, depending on if the cpu can do
lzcnt or just bsr. Fun stuff...

In any case,
Reviewed-by: Roland Scheidegger 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] compiler: add SYSTEM_VALUE_VARYING_COORD

2018-08-20 Thread Marek Olšák
On Mon, Aug 20, 2018 at 7:06 PM Rob Clark  wrote:
>
> On Mon, Aug 20, 2018 at 6:54 PM Bas Nieuwenhuizen
>  wrote:
> >
> > On Tue, Aug 21, 2018 at 12:38 AM, Marek Olšák  wrote:
> > > On Fri, Aug 10, 2018 at 9:26 AM Rob Clark  wrote:
> > >>
> > >> Used internally in freedreno/ir3 for the vec2 value that hw passes to
> > >> shader to use as coordinate for bary.f (varying fetch) instruction.
> > >> This is not the same as SYSTEM_VALUE_FRAG_COORD.
> > >>
> > >> Signed-off-by: Rob Clark 
> > >> ---
> > >> Up until now, we'd been hard-coding the location of this value (ie. to
> > >> r0.xy), mostly because originally in the early a3xx days I didn't know
> > >> which bits could configure this value (blob was always using r0.xy so
> > >> in cmdstream traces it always showed up as 0's).
> > >>
> > >> But starting with a6xx, the address register aliases r0.x, which kinda
> > >> throws a monkey-wrench in the existing scheme of hard-coding.  The good
> > >> news is that I know the bits to configure this value for a3xx-a6xx.
> > >>
> > >> So I'm shifting over to handling this like a sysval.
> > >>
> > >>  src/compiler/shader_enums.c| 1 +
> > >>  src/compiler/shader_enums.h| 6 ++
> > >>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 +
> > >>  3 files changed, 8 insertions(+)
> > >>
> > >> diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c
> > >> index a874083a0b7..0210b503d3f 100644
> > >> --- a/src/compiler/shader_enums.c
> > >> +++ b/src/compiler/shader_enums.c
> > >> @@ -244,6 +244,7 @@ gl_system_value_name(gl_system_value sysval)
> > >>   ENUM(SYSTEM_VALUE_DEVICE_INDEX),
> > >>   ENUM(SYSTEM_VALUE_VIEW_INDEX),
> > >>   ENUM(SYSTEM_VALUE_VERTEX_CNT),
> > >> + ENUM(SYSTEM_VALUE_VARYING_COORD),
> > >> };
> > >> STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX);
> > >> return NAME(sysval);
> > >> diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h
> > >> index f8e22925f35..5c36f55283c 100644
> > >> --- a/src/compiler/shader_enums.h
> > >> +++ b/src/compiler/shader_enums.h
> > >> @@ -601,6 +601,12 @@ typedef enum
> > >>  */
> > >> SYSTEM_VALUE_VERTEX_CNT,
> > >>
> > >> +   /**
> > >> +* Driver internal varying-coord, used for varying-fetch 
> > >> instructions.
> > >> +* Not externally visible.
> > >> +*/
> > >
> > > Can you improve the documentation, so that mere mortals understand
> > > what it means? Does it correspond to something in AMD hw?
> >
> > I'd expect this to correspond to stuff like PERSP_CENTER on AMD?
> >
> > Which begs the question, does this need distinguishing on
> > center/centroid/per-sample?
>
> so, adreno, being gles hw, has just "smooth" and "flat" varyings..
>
> that said, I've been kinda thinking of this as an opaque driver
> specific sysval without paying too much attention to what the value
> actually is other than "the thing you pass to bary.f to get non-flat
> varyings"..  I confess to not having looked into how this works on
> other hw..

GIven what you said, the variable contains barycentric coordinates
(i,j) for perspective interpolation at the pixel center. It's the same
as "vec2 gl_BaryCoordSmoothAMD;" from
GL_AMD_shader_explicit_vertex_parameter.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] vulkan/wsi: fix pointer-integer conversion warnings

2018-08-20 Thread Grazvydas Ignotas
For 32bit build. Trivial.
---
 src/vulkan/wsi/wsi_common_display.c | 4 ++--
 src/vulkan/wsi/wsi_common_x11.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/vulkan/wsi/wsi_common_display.c 
b/src/vulkan/wsi/wsi_common_display.c
index e6cba188dfa..1e90bba460c 100644
--- a/src/vulkan/wsi/wsi_common_display.c
+++ b/src/vulkan/wsi/wsi_common_display.c
@@ -1515,11 +1515,11 @@ wsi_register_vblank_event(struct wsi_display_fence 
*fence,
for (;;) {
   int ret = drmCrtcQueueSequence(wsi->fd, connector->crtc_id,
  flags,
  frame_requested,
  frame_queued,
- (uint64_t) fence);
+ (uintptr_t) fence);
 
   if (!ret)
  return VK_SUCCESS;
 
   if (errno != ENOMEM) {
@@ -2340,11 +2340,11 @@ wsi_get_randr_output_display(VkPhysicalDevice 
physical_device,
   wsi_display_get_output(wsi_device, connection, (xcb_randr_output_t) 
output);
 
if (connector)
   *display = wsi_display_connector_to_handle(connector);
else
-  *display = NULL;
+  *display = VK_NULL_HANDLE;
return VK_SUCCESS;
 }
 
 #endif
 
diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c
index 7b930884b47..aaa4d1e658e 100644
--- a/src/vulkan/wsi/wsi_common_x11.c
+++ b/src/vulkan/wsi/wsi_common_x11.c
@@ -1323,11 +1323,11 @@ x11_surface_create_swapchain(VkIcdSurfaceBase 
*icd_surface,
 * last completion mode, to ensure we don't get into reallocation
 * cycles. If we are starting anew, we set 'COPY', as that is the only
 * mode which provokes reallocation when anything changes, to make
 * sure we have the most optimal allocation.
 */
-   struct x11_swapchain *old_chain = (void *) pCreateInfo->oldSwapchain;
+   struct x11_swapchain *old_chain = (void *)(intptr_t) 
pCreateInfo->oldSwapchain;
if (old_chain)
   chain->last_present_mode = old_chain->last_present_mode;
else
   chain->last_present_mode = XCB_PRESENT_COMPLETE_MODE_COPY;
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] compiler: add SYSTEM_VALUE_VARYING_COORD

2018-08-20 Thread Marek Olšák
On Fri, Aug 10, 2018 at 9:26 AM Rob Clark  wrote:
>
> Used internally in freedreno/ir3 for the vec2 value that hw passes to
> shader to use as coordinate for bary.f (varying fetch) instruction.
> This is not the same as SYSTEM_VALUE_FRAG_COORD.
>
> Signed-off-by: Rob Clark 
> ---
> Up until now, we'd been hard-coding the location of this value (ie. to
> r0.xy), mostly because originally in the early a3xx days I didn't know
> which bits could configure this value (blob was always using r0.xy so
> in cmdstream traces it always showed up as 0's).
>
> But starting with a6xx, the address register aliases r0.x, which kinda
> throws a monkey-wrench in the existing scheme of hard-coding.  The good
> news is that I know the bits to configure this value for a3xx-a6xx.
>
> So I'm shifting over to handling this like a sysval.
>
>  src/compiler/shader_enums.c| 1 +
>  src/compiler/shader_enums.h| 6 ++
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 +
>  3 files changed, 8 insertions(+)
>
> diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c
> index a874083a0b7..0210b503d3f 100644
> --- a/src/compiler/shader_enums.c
> +++ b/src/compiler/shader_enums.c
> @@ -244,6 +244,7 @@ gl_system_value_name(gl_system_value sysval)
>   ENUM(SYSTEM_VALUE_DEVICE_INDEX),
>   ENUM(SYSTEM_VALUE_VIEW_INDEX),
>   ENUM(SYSTEM_VALUE_VERTEX_CNT),
> + ENUM(SYSTEM_VALUE_VARYING_COORD),
> };
> STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX);
> return NAME(sysval);
> diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h
> index f8e22925f35..5c36f55283c 100644
> --- a/src/compiler/shader_enums.h
> +++ b/src/compiler/shader_enums.h
> @@ -601,6 +601,12 @@ typedef enum
>  */
> SYSTEM_VALUE_VERTEX_CNT,
>
> +   /**
> +* Driver internal varying-coord, used for varying-fetch instructions.
> +* Not externally visible.
> +*/

Can you improve the documentation, so that mere mortals understand
what it means? Does it correspond to something in AMD hw?

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/13] glsl: Add built-in functions for NV_shader_atomic_float

2018-08-20 Thread Caio Marcelo de Oliveira Filho
> > Question: why some builtins care about "supported" while other care
> > about the extension being "enabled"?
> 
> There are actually two different things happening.  In the cases where
> we only care about the extension being supported, we're creating a
> hidden intrinsic function.  These are used internally by the compiler to
> implement various features.  In the cases where we care about the
> extension being enabled, we're creating the user-visible function.
> Usually the user-visible function will is implemented by calling the
> intrinsic.  This two level approach is used so that we can more cleanly
> support cases where multiple extensions have functions with slightly
> different names that do the same thing.  atomicCounterMaxARB and
> atomicCounterMax, for example, are both implemented using
> __intrinsic_atomic_max.

Thanks for the explanation!




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC][PATCH 0/5] mesa: Add types for AMD_depth_clamp_separate.

2018-08-20 Thread Marek Olšák
I can try to test the extension with the radeonsi driver. Do you have
a Mesa branch with the final patches?

Marek

On Mon, Aug 13, 2018 at 5:35 PM Sagar Ghuge  wrote:
>
> Hi everyone,
>
> I am kind of stuck on this part actually. I don't have
> latest AMD graphics card to test following behavior which
> Ian and Marek suggested me.
>
> I have written a piglit test :
> https://gitlab.freedesktop.org/sagarghuge/piglit/blob/320b91ffb131b380f1d27d9c05ab141e0cd9e557/tests/spec/amd_depth_clamp_separate/depth_clamp_get_test.c
>
> It would be great if someone can help me or test it in their
> spare time on latest AMD graphics card and provide some input
> on the extension behavior on AMD's closed source driver.
>
>
> On 08/09/2018 01:11 PM, Marek Olšák wrote:
> > On Thu, Aug 2, 2018 at 2:44 PM, Ian Romanick  wrote:
> >> On 08/02/2018 11:30 AM, Ian Romanick wrote:
> >>> On 08/01/2018 08:31 PM, Sagar Ghuge wrote:
>  Add some basic types and storage for the
>  AMD_depth_clamp_separate extension.
> >>
> >> I mentioned this on patch 5, but you should word wrap the commit message
> >> to 70 or 72 columns.
> >>
> >> More substantive comments are below...
> >>
>  Signed-off-by: Sagar Ghuge 
>  ---
>   include/GL/glcorearb.h   | 2 ++
>   src/mesa/main/extensions_table.h | 1 +
>   src/mesa/main/mtypes.h   | 9 +
>   3 files changed, 12 insertions(+)
> 
>  diff --git a/include/GL/glcorearb.h b/include/GL/glcorearb.h
>  index a78bbb6e18..d73ca5a8df 100644
>  --- a/include/GL/glcorearb.h
>  +++ b/include/GL/glcorearb.h
>  @@ -1558,6 +1558,8 @@ typedef int64_t GLint64;
>   #define GL_MAX_FRAGMENT_INPUT_COMPONENTS  0x9125
>   #define GL_CONTEXT_PROFILE_MASK   0x9126
>   #define GL_DEPTH_CLAMP0x864F
>  +#define GL_DEPTH_CLAMP_NEAR_AMD   0x901E
>  +#define GL_DEPTH_CLAMP_FAR_AMD0x901F
>   #define GL_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION 0x8E4C
>   #define GL_FIRST_VERTEX_CONVENTION0x8E4D
>   #define GL_LAST_VERTEX_CONVENTION 0x8E4E
> >>>
> >>> We should just import the updated versions of the Khronos headers.  I
> >>> think Marek sent out a patch to do this.  Does that work?
> >>>
>  diff --git a/src/mesa/main/extensions_table.h 
>  b/src/mesa/main/extensions_table.h
>  index 3f01896cae..8dc668e087 100644
>  --- a/src/mesa/main/extensions_table.h
>  +++ b/src/mesa/main/extensions_table.h
>  @@ -9,6 +9,7 @@
>   EXT(3DFX_texture_compression_FXT1   , 
>  TDFX_texture_compression_FXT1  , GLL, GLC,  x ,  x , 1999)
> 
>   EXT(AMD_conservative_depth  , ARB_conservative_depth
>   , GLL, GLC,  x ,  x , 2009)
>  +EXT(AMD_depth_clamp_separate, AMD_depth_clamp_separate  
>   ,  x , GLC,  x ,  x , 2009)
>   EXT(AMD_draw_buffers_blend  , ARB_draw_buffers_blend
>   , GLL, GLC,  x ,  x , 2009)
>   EXT(AMD_performance_monitor , AMD_performance_monitor   
>   , GLL, GLC,  x , ES2, 2007)
>   EXT(AMD_pinned_memory   , AMD_pinned_memory 
>   , GLL, GLC,  x ,  x , 2013)
>  diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
>  index d71872835d..406746a84c 100644
>  --- a/src/mesa/main/mtypes.h
>  +++ b/src/mesa/main/mtypes.h
>  @@ -1280,6 +1280,8 @@ struct gl_transform_attrib
>  GLboolean RescaleNormals;/**< 
>  GL_EXT_rescale_normal */
>  GLboolean RasterPositionUnclipped;   /**< 
>  GL_IBM_rasterpos_clip */
>  GLboolean DepthClamp;/**< GL_ARB_depth_clamp */
>  +   GLboolean DepthClampNear;/**< 
>  GL_AMD_depth_clamp_separate */
>  +   GLboolean DepthClampFar; /**< 
>  GL_AMD_depth_clamp_separate */
> >>
> >> I think we actually need two more flags here: _DepthClampNear and
> >> _DepthClampFar.  The spec is a little unclear, so you may need to test
> >> on some AMD closed-source drivers.  Specifically, the spec says
> >>
> >> "In addition to DEPTH_CLAMP_NEAR_AMD and DEPTH_CLAMP_FAR_AMD, the
> >> token DEPTH_CLAMP may be used to simultaneously enable or disable
> >> depth clamping at both the near and far planes."
> >>
> >> Based on that, I'm not sure what you're supposed to get if you do:
> >>
> >> glDisable(GL_DEPTH_CLAMP_NEAR_AMD);
> >> glEnable(GL_DEPTH_CLAMP);
> >> glGetIntegerv(GL_DEPTH_CLAMP_NEAR_AMD, );
> >>
> >> Should v contain GL_TRUE or GL_FALSE?  It seems clear that rendering
> >> should have the near plane clamped.
> >>
> >> Depending on the results of testing on AMD drivers, we either need
> >> enable / disable of GL_DEPTH_CLAMP to set / reset
> >> gl_transform_attrib::DepthClampNear and
> >> 

Re: [Mesa-dev] [PATCH 4/4] amd/addrlib: mark returnCode as MAYBE_UNUSED in ElemGetExportNorm

2018-08-20 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek
On Fri, Aug 17, 2018 at 10:41 AM Kai Wasserbäch
 wrote:
>
> Only used, when asserts are enabled.
>
> Fixes an unused-but-set-variable warning with GCC 8:
>  ../../../src/amd/addrlib/addrinterface.cpp: In function 'int 
> ElemGetExportNorm(ADDR_HANDLE, const ELEM_GETEXPORTNORM_INPUT*)':
>  ../../../src/amd/addrlib/addrinterface.cpp:835:23: warning: variable 
> 'returnCode' set but not used [-Wunused-but-set-variable]
>   ADDR_E_RETURNCODE returnCode = ADDR_OK;
> ^~
>
> Signed-off-by: Kai Wasserbäch 
> ---
>  src/amd/Makefile.addrlib.am   | 1 +
>  src/amd/addrlib/addrinterface.cpp | 4 +++-
>  2 files changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/Makefile.addrlib.am b/src/amd/Makefile.addrlib.am
> index 75ff7fbcf7..af0daf907d 100644
> --- a/src/amd/Makefile.addrlib.am
> +++ b/src/amd/Makefile.addrlib.am
> @@ -24,6 +24,7 @@ ADDRLIB_LIBS = addrlib/libamdgpu_addrlib.la
>  addrlib_libamdgpu_addrlib_la_CPPFLAGS = \
> $(DEFINES) \
> -I$(top_srcdir)/src/ \
> +   -I$(top_srcdir)/include \
> -I$(srcdir)/common \
> -I$(srcdir)/addrlib \
> -I$(srcdir)/addrlib/core \
> diff --git a/src/amd/addrlib/addrinterface.cpp 
> b/src/amd/addrlib/addrinterface.cpp
> index 112431e2cb..9cbeba41d9 100644
> --- a/src/amd/addrlib/addrinterface.cpp
> +++ b/src/amd/addrlib/addrinterface.cpp
> @@ -36,6 +36,8 @@
>
>  #include "addrcommon.h"
>
> +#include "util/macros.h"
> +
>  using namespace Addr;
>
>  
> 
> @@ -832,7 +834,7 @@ BOOL_32 ADDR_API ElemGetExportNorm(
>  Addr::Lib* pLib = Lib::GetLib(hLib);
>  BOOL_32 enabled = FALSE;
>
> -ADDR_E_RETURNCODE returnCode = ADDR_OK;
> +MAYBE_UNUSED ADDR_E_RETURNCODE returnCode = ADDR_OK;
>
>  if (pLib != NULL)
>  {
> --
> 2.18.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] util: mark s as MAYBE_UNUSED in _mesa_half_to_unorm8

2018-08-20 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek
On Fri, Aug 17, 2018 at 10:41 AM Kai Wasserbäch
 wrote:
>
> Only used, when asserts are enabled.
>
> Fixes an unused-variable warning with gcc-8:
>  ../../../src/util/half_float.c: In function '_mesa_half_to_unorm8':
>  ../../../src/util/half_float.c:189:14: warning: unused variable 's' 
> [-Wunused-variable]
>  const int s = (val >> 15) & 0x1;
>^
>
> Signed-off-by: Kai Wasserbäch 
> ---
>  src/util/half_float.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/src/util/half_float.c b/src/util/half_float.c
> index 2eff2c84f5..63aec5c5c1 100644
> --- a/src/util/half_float.c
> +++ b/src/util/half_float.c
> @@ -28,6 +28,7 @@
>  #include 
>  #include "half_float.h"
>  #include "rounding.h"
> +#include "macros.h"
>
>  typedef union { float f; int32_t i; uint32_t u; } fi_type;
>
> @@ -186,7 +187,7 @@ uint8_t _mesa_half_to_unorm8(uint16_t val)
>  {
> const int m = val & 0x3ff;
> const int e = (val >> 10) & 0x1f;
> -   const int s = (val >> 15) & 0x1;
> +   MAYBE_UNUSED const int s = (val >> 15) & 0x1;
>
> /* v = round_to_nearest(1.mm * 2^(e-15) * 255)
>  *   = round_to_nearest((1.mm * 255) * 2^(e-15))
> --
> 2.18.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2.1 6/11] nir: Add an array splitting pass

2018-08-20 Thread Caio Marcelo de Oliveira Filho
On Mon, Jul 30, 2018 at 09:16:42AM -0700, Jason Ekstrand wrote:
> This pass looks for array variables where at least one level of the
> array is never indirected and splits it into multiple smaller variables.
> 
> This pass doesn't really do much now because nir_lower_vars_to_ssa can
> already see through arrays of arrays and can detect indirects on just
> one level or even see that arr[i][0][5] does not alias arr[i][1][j].
> This pass exists to help other passes more easily see through arrays of
> arrays.  If a back-end does implement arrays using scratch or indirects
> on registers, having more smaller arrays is likely to have better memory
> efficiency.
> 
> v2 (Jason Ekstrand):
>  - Better comments and naming (some from Caio)
>  - Rework to use one hash map instead of two

The rework reads better.  Thanks.


> v2.1 (Jason Ekstrand):
>  - Fix a couple of bugs that were added in the rework including one
>which basically prevented it from running
> ---
>  src/compiler/nir/nir.h|   1 +
>  src/compiler/nir/nir_split_vars.c | 583 ++
>  2 files changed, 584 insertions(+)


This patch is

Reviewed-by: Caio Marcelo de Oliveira Filho 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] autotools: include git_sha1.h in dist tarball

2018-08-20 Thread Eric Engestrom
On Monday, 2018-08-20 14:23:28 +, Juan A. Suarez Romero wrote:
> This fixes `make distcheck`.
> 
> Fixes: 471f708ed6 ("git_sha1: simplify logic")
> CC: Eric Engestrom 
> ---
>  src/Makefile.am | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/Makefile.am b/src/Makefile.am
> index 396865cbe55..412510f435b 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -26,7 +26,7 @@ git_sha1.h:
>  
>  BUILT_SOURCES = git_sha1.h
>  CLEANFILES = $(BUILT_SOURCES)
> -EXTRA_DIST = git_sha1.h.in meson.build
> +EXTRA_DIST = git_sha1.h meson.build

Hmm, I need to double-check (tomorrow), but this feels wrong to me:
shouldn't we drop it from the EXTRA_DIST, so that it gets regenerated on
the system using the release tarball, instead of shipping the one built
from git?

I'm not that familiar with autotools' dist system, so I might be wrong.

>  
>  SUBDIRS = . gtest util mapi/glapi/gen mapi
>  
> -- 
> 2.17.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 1/3] i965/miptree: Use miptree_map in map_blit functions

2018-08-20 Thread Nanley Chery
This struct contains all the data of interest. can_blit_slice() will use
it in the next patch to calculate the correct pitch.

Suggested-by: Chris Wilson 
Cc: 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index a18d5ac3624..b477c97e51d 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -3542,7 +3542,7 @@ intel_miptree_release_map(struct intel_mipmap_tree *mt,
 
 static bool
 can_blit_slice(struct intel_mipmap_tree *mt,
-   unsigned int level, unsigned int slice)
+   const struct intel_miptree_map *map)
 {
/* See intel_miptree_blit() for details on the 32k pitch limit. */
if (intel_miptree_blt_pitch(mt) >= 32768)
@@ -3554,9 +3554,7 @@ can_blit_slice(struct intel_mipmap_tree *mt,
 static bool
 use_intel_mipree_map_blit(struct brw_context *brw,
   struct intel_mipmap_tree *mt,
-  GLbitfield mode,
-  unsigned int level,
-  unsigned int slice)
+  const struct intel_miptree_map *map)
 {
const struct gen_device_info *devinfo = >screen->devinfo;
 
@@ -3564,19 +3562,19 @@ use_intel_mipree_map_blit(struct brw_context *brw,
   /* It's probably not worth swapping to the blit ring because of
* all the overhead involved.
*/
-   !(mode & GL_MAP_WRITE_BIT) &&
+   !(map->mode & GL_MAP_WRITE_BIT) &&
!mt->compressed &&
(mt->surf.tiling == ISL_TILING_X ||
 /* Prior to Sandybridge, the blitter can't handle Y tiling */
 (devinfo->gen >= 6 && mt->surf.tiling == ISL_TILING_Y0) ||
 /* Fast copy blit on skl+ supports all tiling formats. */
 devinfo->gen >= 9) &&
-   can_blit_slice(mt, level, slice))
+   can_blit_slice(mt, map))
   return true;
 
if (mt->surf.tiling != ISL_TILING_LINEAR &&
mt->bo->size >= brw->max_gtt_map_object_size) {
-  assert(can_blit_slice(mt, level, slice));
+  assert(can_blit_slice(mt, map));
   return true;
}
 
@@ -3625,7 +3623,7 @@ intel_miptree_map(struct brw_context *brw,
   intel_miptree_map_etc(brw, mt, map, level, slice);
} else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
   intel_miptree_map_depthstencil(brw, mt, map, level, slice);
-   } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
+   } else if (use_intel_mipree_map_blit(brw, mt, map)) {
   intel_miptree_map_blit(brw, mt, map, level, slice);
 #if defined(USE_SSE41)
} else if (!(mode & GL_MAP_WRITE_BIT) &&
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 2/3] i965/miptree: Fix can_blit_slice()

2018-08-20 Thread Nanley Chery
Check the destination's row pitch against the BLT engine's row pitch
limitation as well.

Fixes: 0288fe8d0417730bdd5b3477130dd1dc32bdbcd3
("i965/miptree: Use the correct BLT pitch")

v2: Fix the Fixes tag (Dylan).
Check the destination row pitch (Chris).

Cc: 
Reported-by: Dylan Baker 
---

I decided against using the mesa row pitch helper to keep the
dst_blt_pitch assignment on one line.

 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index b477c97e51d..983f145afc9 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -3545,10 +3545,9 @@ can_blit_slice(struct intel_mipmap_tree *mt,
const struct intel_miptree_map *map)
 {
/* See intel_miptree_blit() for details on the 32k pitch limit. */
-   if (intel_miptree_blt_pitch(mt) >= 32768)
-  return false;
-
-   return true;
+   const unsigned src_blt_pitch = intel_miptree_blt_pitch(mt);
+   const unsigned dst_blt_pitch = ALIGN(map->w * mt->cpp, 64);
+   return src_blt_pitch < 32768 && dst_blt_pitch < 32768;
 }
 
 static bool
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 3/3] intel/isl: Avoid tiling some 16K-wide render targets

2018-08-20 Thread Nanley Chery
Fix rendering issues on BDW and SKL.

Fixes: 0288fe8d0417730bdd5b3477130dd1dc32bdbcd3
("i965/miptree: Use the correct BLT pitch")

Fixes the following regressions seen

exclusively on SKL:
* KHR-GL46.texture_barrier_ARB.disjoint-texels
* KHR-GL46.texture_barrier_ARB.overlapping-texels
* KHR-GL46.texture_barrier.disjoint-texels
* KHR-GL46.texture_barrier.overlapping-texels

and both on BDW and SKL:
* GTF-GL46.gtf21.GL2FixedTests.buffer_corners.buffer_corners
* GTF-GL46.gtf21.GL2FixedTests.stencil_plane_corners.stencil_plane_corners

v2: Note the fixed tests (Andres).
Don't cause failures with multisampled buffers (Andres).
Don't hamper SKL GT4 (Ken).
v3: Fix the Fixes tag (Dylan).

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107359
Cc: 
---
 src/intel/isl/isl_gen7.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/src/intel/isl/isl_gen7.c b/src/intel/isl/isl_gen7.c
index 4fa9851233f..a9db21fba52 100644
--- a/src/intel/isl/isl_gen7.c
+++ b/src/intel/isl/isl_gen7.c
@@ -294,6 +294,29 @@ isl_gen6_filter_tiling(const struct isl_device *dev,
 */
if (ISL_DEV_GEN(dev) < 7 && isl_format_get_layout(info->format)->bpb >= 128)
   *flags &= ~ISL_TILING_Y0_BIT;
+
+   /* From the BDW and SKL PRMs, Volume 2d,
+* RENDER_SURFACE_STATE::Width - Programming Notes:
+*
+*   A known issue exists if a primitive is rendered to the first 2 rows and
+*   last 2 columns of a 16K width surface. If any geometry is drawn inside
+*   this square it will be copied to column X=2 and X=3 (arrangement on Y
+*   position will stay the same). If any geometry exceeds the boundaries of
+*   this 2x2 region it will be drawn normally. The issue also only occurs
+*   if the surface has TileMode != Linear.
+*
+* [Internal documentation notes that this issue isn't present on SKL GT4.]
+* To prevent this rendering corruption, only allow linear tiling for
+* surfaces with widths greater than 16K-2 pixels.
+*
+* TODO: Is this an issue for multisampled surfaces as well?
+*/
+   if (info->width > 16382 && info->samples == 1 &&
+   info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT &&
+   (ISL_DEV_GEN(dev) == 8 ||
+(dev->info->is_skylake && dev->info->gt != 4))) {
+  *flags &= ISL_TILING_LINEAR_BIT;
+   }
 }
 
 void
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] vulkan/wsi: fix pointer-integer conversion warnings

2018-08-20 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

Did you have access to push?

On Mon, Aug 20, 2018 at 11:40 PM, Grazvydas Ignotas  wrote:
> For 32bit build. Trivial.
> ---
>  src/vulkan/wsi/wsi_common_display.c | 4 ++--
>  src/vulkan/wsi/wsi_common_x11.c | 2 +-
>  2 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/src/vulkan/wsi/wsi_common_display.c 
> b/src/vulkan/wsi/wsi_common_display.c
> index e6cba188dfa..1e90bba460c 100644
> --- a/src/vulkan/wsi/wsi_common_display.c
> +++ b/src/vulkan/wsi/wsi_common_display.c
> @@ -1515,11 +1515,11 @@ wsi_register_vblank_event(struct wsi_display_fence 
> *fence,
> for (;;) {
>int ret = drmCrtcQueueSequence(wsi->fd, connector->crtc_id,
>   flags,
>   frame_requested,
>   frame_queued,
> - (uint64_t) fence);
> + (uintptr_t) fence);
>
>if (!ret)
>   return VK_SUCCESS;
>
>if (errno != ENOMEM) {
> @@ -2340,11 +2340,11 @@ wsi_get_randr_output_display(VkPhysicalDevice 
> physical_device,
>wsi_display_get_output(wsi_device, connection, (xcb_randr_output_t) 
> output);
>
> if (connector)
>*display = wsi_display_connector_to_handle(connector);
> else
> -  *display = NULL;
> +  *display = VK_NULL_HANDLE;
> return VK_SUCCESS;
>  }
>
>  #endif
>
> diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c
> index 7b930884b47..aaa4d1e658e 100644
> --- a/src/vulkan/wsi/wsi_common_x11.c
> +++ b/src/vulkan/wsi/wsi_common_x11.c
> @@ -1323,11 +1323,11 @@ x11_surface_create_swapchain(VkIcdSurfaceBase 
> *icd_surface,
>  * last completion mode, to ensure we don't get into reallocation
>  * cycles. If we are starting anew, we set 'COPY', as that is the only
>  * mode which provokes reallocation when anything changes, to make
>  * sure we have the most optimal allocation.
>  */
> -   struct x11_swapchain *old_chain = (void *) pCreateInfo->oldSwapchain;
> +   struct x11_swapchain *old_chain = (void *)(intptr_t) 
> pCreateInfo->oldSwapchain;
> if (old_chain)
>chain->last_present_mode = old_chain->last_present_mode;
> else
>chain->last_present_mode = XCB_PRESENT_COMPLETE_MODE_COPY;
>
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] radv: use different builtin shader cache for 32bit

2018-08-20 Thread Bas Nieuwenhuizen
On Mon, Aug 20, 2018 at 11:32 PM, Grazvydas Ignotas  wrote:
> Currently if 64bit and 32bit programs are used interchangeably, radv
> will keep overwriting the cache. Use separate cache files to avoid
> that.

I probably should also split this out per GPU for people who have
different GPUs ...

For now, this series is

Reviewed-by: Bas Nieuwenhuizen 

> ---
>  src/amd/vulkan/radv_meta.c | 16 +++-
>  1 file changed, 7 insertions(+), 9 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_meta.c b/src/amd/vulkan/radv_meta.c
> index b8d0a7cfc70..1ec8896afa2 100644
> --- a/src/amd/vulkan/radv_meta.c
> +++ b/src/amd/vulkan/radv_meta.c
> @@ -232,31 +232,29 @@ radv_builtin_cache_path(char *path)
> char *xdg_cache_home = getenv("XDG_CACHE_HOME");
> const char *suffix = "/radv_builtin_shaders";
> const char *suffix2 = "/.cache/radv_builtin_shaders";
> struct passwd pwd, *result;
> char path2[PATH_MAX + 1]; /* PATH_MAX is not a real max,but suffices 
> here. */
> +   int ret;
>
> if (xdg_cache_home) {
> -
> -   if (strlen(xdg_cache_home) + strlen(suffix) > PATH_MAX)
> -   return false;
> -
> -   strcpy(path, xdg_cache_home);
> -   strcat(path, suffix);
> -   return true;
> +   ret = snprintf(path, PATH_MAX + 1, "%s%s%zd",
> +  xdg_cache_home, suffix, sizeof(void *) * 8);
> +   return ret > 0 && ret < PATH_MAX + 1;
> }
>
> getpwuid_r(getuid(), , path2, PATH_MAX - strlen(suffix2), 
> );
> if (!result)
> return false;
>
> strcpy(path, pwd.pw_dir);
> strcat(path, "/.cache");
> mkdir(path, 0755);
>
> -   strcat(path, suffix);
> -   return true;
> +   ret = snprintf(path, PATH_MAX + 1, "%s%s%zd",
> +  pwd.pw_dir, suffix2, sizeof(void *) * 8);
> +   return ret > 0 && ret < PATH_MAX + 1;
>  }
>
>  static bool
>  radv_load_meta_pipeline(struct radv_device *device)
>  {
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/13] glsl: Add built-in functions for NV_shader_atomic_float

2018-08-20 Thread Ian Romanick
On 08/20/2018 12:10 PM, Caio Marcelo de Oliveira Filho wrote:
> Hi,
> 
>> @@ -1133,6 +1159,9 @@ builtin_builder::create_intrinsics()
>>  _atomic_intrinsic2(buffer_atomics_supported,
>> glsl_type::int_type,
>> ir_intrinsic_generic_atomic_add),
>> +_atomic_intrinsic2(NV_shader_atomic_float_supported,
>> +   glsl_type::float_type,
>> +   ir_intrinsic_generic_atomic_add),
>>  
>> _atomic_counter_intrinsic1(shader_atomic_counter_ops_or_v460_desktop,
>> ir_intrinsic_atomic_counter_add),
>>  NULL);
> 
> (...)
> 
>> @@ -3185,6 +3220,9 @@ builtin_builder::create_builtins()
>>  _atomic_op2("__intrinsic_atomic_exchange",
>>  buffer_atomics_supported,
>>  glsl_type::int_type),
>> +_atomic_op2("__intrinsic_atomic_exchange",
>> +shader_atomic_float_exchange,
>> +glsl_type::float_type),
>>  NULL);
>> add_function("atomicCompSwap",
>>  _atomic_op3("__intrinsic_atomic_comp_swap",
> 
> Question: why some builtins care about "supported" while other care
> about the extension being "enabled"?

There are actually two different things happening.  In the cases where
we only care about the extension being supported, we're creating a
hidden intrinsic function.  These are used internally by the compiler to
implement various features.  In the cases where we care about the
extension being enabled, we're creating the user-visible function.
Usually the user-visible function will is implemented by calling the
intrinsic.  This two level approach is used so that we can more cleanly
support cases where multiple extensions have functions with slightly
different names that do the same thing.  atomicCounterMaxARB and
atomicCounterMax, for example, are both implemented using
__intrinsic_atomic_max.

> Thanks,
> Caio
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC][PATCH 0/5] mesa: Add types for AMD_depth_clamp_separate.

2018-08-20 Thread Sagar Ghuge
Thank you for volunteering to test my branch. But before I point you to the 
branch,
I will rework patches according to your comment on patch 3.

Again thanks a lot for your and Ian's input. 

- Sagar  

On 08/20/2018 04:21 PM, Marek Olšák wrote:
> I can try to test the extension with the radeonsi driver. Do you have
> a Mesa branch with the final patches?
> 
> Marek
> 
> On Mon, Aug 13, 2018 at 5:35 PM Sagar Ghuge  wrote:
>>
>> Hi everyone,
>>
>> I am kind of stuck on this part actually. I don't have
>> latest AMD graphics card to test following behavior which
>> Ian and Marek suggested me.
>>
>> I have written a piglit test :
>> https://gitlab.freedesktop.org/sagarghuge/piglit/blob/320b91ffb131b380f1d27d9c05ab141e0cd9e557/tests/spec/amd_depth_clamp_separate/depth_clamp_get_test.c
>>
>> It would be great if someone can help me or test it in their
>> spare time on latest AMD graphics card and provide some input
>> on the extension behavior on AMD's closed source driver.
>>
>>
>> On 08/09/2018 01:11 PM, Marek Olšák wrote:
>>> On Thu, Aug 2, 2018 at 2:44 PM, Ian Romanick  wrote:
 On 08/02/2018 11:30 AM, Ian Romanick wrote:
> On 08/01/2018 08:31 PM, Sagar Ghuge wrote:
>> Add some basic types and storage for the
>> AMD_depth_clamp_separate extension.

 I mentioned this on patch 5, but you should word wrap the commit message
 to 70 or 72 columns.

 More substantive comments are below...

>> Signed-off-by: Sagar Ghuge 
>> ---
>>  include/GL/glcorearb.h   | 2 ++
>>  src/mesa/main/extensions_table.h | 1 +
>>  src/mesa/main/mtypes.h   | 9 +
>>  3 files changed, 12 insertions(+)
>>
>> diff --git a/include/GL/glcorearb.h b/include/GL/glcorearb.h
>> index a78bbb6e18..d73ca5a8df 100644
>> --- a/include/GL/glcorearb.h
>> +++ b/include/GL/glcorearb.h
>> @@ -1558,6 +1558,8 @@ typedef int64_t GLint64;
>>  #define GL_MAX_FRAGMENT_INPUT_COMPONENTS  0x9125
>>  #define GL_CONTEXT_PROFILE_MASK   0x9126
>>  #define GL_DEPTH_CLAMP0x864F
>> +#define GL_DEPTH_CLAMP_NEAR_AMD   0x901E
>> +#define GL_DEPTH_CLAMP_FAR_AMD0x901F
>>  #define GL_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION 0x8E4C
>>  #define GL_FIRST_VERTEX_CONVENTION0x8E4D
>>  #define GL_LAST_VERTEX_CONVENTION 0x8E4E
>
> We should just import the updated versions of the Khronos headers.  I
> think Marek sent out a patch to do this.  Does that work?
>
>> diff --git a/src/mesa/main/extensions_table.h 
>> b/src/mesa/main/extensions_table.h
>> index 3f01896cae..8dc668e087 100644
>> --- a/src/mesa/main/extensions_table.h
>> +++ b/src/mesa/main/extensions_table.h
>> @@ -9,6 +9,7 @@
>>  EXT(3DFX_texture_compression_FXT1   , 
>> TDFX_texture_compression_FXT1  , GLL, GLC,  x ,  x , 1999)
>>
>>  EXT(AMD_conservative_depth  , ARB_conservative_depth
>>  , GLL, GLC,  x ,  x , 2009)
>> +EXT(AMD_depth_clamp_separate, AMD_depth_clamp_separate  
>>  ,  x , GLC,  x ,  x , 2009)
>>  EXT(AMD_draw_buffers_blend  , ARB_draw_buffers_blend
>>  , GLL, GLC,  x ,  x , 2009)
>>  EXT(AMD_performance_monitor , AMD_performance_monitor   
>>  , GLL, GLC,  x , ES2, 2007)
>>  EXT(AMD_pinned_memory   , AMD_pinned_memory 
>>  , GLL, GLC,  x ,  x , 2013)
>> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
>> index d71872835d..406746a84c 100644
>> --- a/src/mesa/main/mtypes.h
>> +++ b/src/mesa/main/mtypes.h
>> @@ -1280,6 +1280,8 @@ struct gl_transform_attrib
>> GLboolean RescaleNormals;/**< 
>> GL_EXT_rescale_normal */
>> GLboolean RasterPositionUnclipped;   /**< 
>> GL_IBM_rasterpos_clip */
>> GLboolean DepthClamp;/**< GL_ARB_depth_clamp */
>> +   GLboolean DepthClampNear;/**< 
>> GL_AMD_depth_clamp_separate */
>> +   GLboolean DepthClampFar; /**< 
>> GL_AMD_depth_clamp_separate */

 I think we actually need two more flags here: _DepthClampNear and
 _DepthClampFar.  The spec is a little unclear, so you may need to test
 on some AMD closed-source drivers.  Specifically, the spec says

 "In addition to DEPTH_CLAMP_NEAR_AMD and DEPTH_CLAMP_FAR_AMD, the
 token DEPTH_CLAMP may be used to simultaneously enable or disable
 depth clamping at both the near and far planes."

 Based on that, I'm not sure what you're supposed to get if you do:

 glDisable(GL_DEPTH_CLAMP_NEAR_AMD);
 glEnable(GL_DEPTH_CLAMP);
 glGetIntegerv(GL_DEPTH_CLAMP_NEAR_AMD, );

 Should v contain GL_TRUE or GL_FALSE?  It seems 

Re: [Mesa-dev] [PATCH 05/12] amd/addrlib: mark physicalSliceSize as MAYBE_UNUSED in Addr::V1::EgBasedLib::HwlGetSizeAdjustmentMicroTiled

2018-08-20 Thread Marek Olšák
I've sent comments on patches 3 & 4. With those addressed, patches 1-5 are:

Reviewed-by: Marek Olšák 
On Sat, Aug 18, 2018 at 7:16 AM Kai Wasserbäch
 wrote:
>
> Only used, when asserts are enabled.
>
> Fixes an unused-but-set-variable warning with GCC 8:
>  ../../../src/amd/addrlib/r800/egbaddrlib.cpp: In member function 'virtual 
> long long unsigned int 
> Addr::V1::EgBasedLib::HwlGetSizeAdjustmentMicroTiled(unsigned int, unsigned 
> int, ADDR_SURFACE_FLAGS, unsigned int, unsigned int, unsigned int, unsigned 
> int*, unsigned int*) const':
>  ../../../src/amd/addrlib/r800/egbaddrlib.cpp:4111:13: warning: variable 
> 'physicalSliceSize' set but not used [-Wunused-but-set-variable]
>   UINT_64 physicalSliceSize;
>   ^
>
> Cc: Marek Olšák 
> Signed-off-by: Kai Wasserbäch 
> ---
>  src/amd/addrlib/r800/egbaddrlib.cpp | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/amd/addrlib/r800/egbaddrlib.cpp 
> b/src/amd/addrlib/r800/egbaddrlib.cpp
> index 37a60c7a85..edc7a13070 100644
> --- a/src/amd/addrlib/r800/egbaddrlib.cpp
> +++ b/src/amd/addrlib/r800/egbaddrlib.cpp
> @@ -4110,7 +4110,7 @@ UINT_64 EgBasedLib::HwlGetSizeAdjustmentMicroTiled(
>  ) const
>  {
>  UINT_64 logicalSliceSize;
> -UINT_64 physicalSliceSize;
> +MAYBE_UNUSED UINT_64 physicalSliceSize;
>
>  UINT_32 pitch   = *pPitch;
>  UINT_32 height  = *pHeight;
> --
> 2.18.0
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] radv: use different builtin shader cache for 32bit

2018-08-20 Thread Grazvydas Ignotas
Currently if 64bit and 32bit programs are used interchangeably, radv
will keep overwriting the cache. Use separate cache files to avoid
that.
---
 src/amd/vulkan/radv_meta.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/amd/vulkan/radv_meta.c b/src/amd/vulkan/radv_meta.c
index b8d0a7cfc70..1ec8896afa2 100644
--- a/src/amd/vulkan/radv_meta.c
+++ b/src/amd/vulkan/radv_meta.c
@@ -232,31 +232,29 @@ radv_builtin_cache_path(char *path)
char *xdg_cache_home = getenv("XDG_CACHE_HOME");
const char *suffix = "/radv_builtin_shaders";
const char *suffix2 = "/.cache/radv_builtin_shaders";
struct passwd pwd, *result;
char path2[PATH_MAX + 1]; /* PATH_MAX is not a real max,but suffices 
here. */
+   int ret;
 
if (xdg_cache_home) {
-
-   if (strlen(xdg_cache_home) + strlen(suffix) > PATH_MAX)
-   return false;
-
-   strcpy(path, xdg_cache_home);
-   strcat(path, suffix);
-   return true;
+   ret = snprintf(path, PATH_MAX + 1, "%s%s%zd",
+  xdg_cache_home, suffix, sizeof(void *) * 8);
+   return ret > 0 && ret < PATH_MAX + 1;
}
 
getpwuid_r(getuid(), , path2, PATH_MAX - strlen(suffix2), );
if (!result)
return false;
 
strcpy(path, pwd.pw_dir);
strcat(path, "/.cache");
mkdir(path, 0755);
 
-   strcat(path, suffix);
-   return true;
+   ret = snprintf(path, PATH_MAX + 1, "%s%s%zd",
+  pwd.pw_dir, suffix2, sizeof(void *) * 8);
+   return ret > 0 && ret < PATH_MAX + 1;
 }
 
 static bool
 radv_load_meta_pipeline(struct radv_device *device)
 {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] llvmpipe: add cc clobber to inline asm

2018-08-20 Thread Grazvydas Ignotas
The bsr instruction modifies flags, so that needs to be indicated to the
compiler. No effect on generated code, but still needed for correctness.
---
 src/gallium/drivers/llvmpipe/lp_setup_tri.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c 
b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index cec6198ec63..1852ec05d56 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -732,11 +732,12 @@ floor_pot(uint32_t n)
if (n == 0)
   return 0;
 
__asm__("bsr %1,%0"
   : "=r" (n)
-  : "rm" (n));
+  : "rm" (n)
+  : "cc");
return 1 << n;
 #else
n |= (n >>  1);
n |= (n >>  2);
n |= (n >>  4);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] radv: place pointer length into cache uuid

2018-08-20 Thread Grazvydas Ignotas
Thanks to reproducible builds, binary file timestamps may be identical
for both 32bit and 64bit packages when built from the same source.
This means radv will use the same cache for both 32 and 64 bit
processes, which leads to crashes.

Conveniently there is a spare byte in cache_uuid, let's place the
pointer size there.

Fixes: f4e499ec79 "radv: add initial non-conformant radv vulkan driver"
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107601
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105904
---
 src/amd/vulkan/radv_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index cc88abb57a8..79dbbd886d5 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -58,11 +58,11 @@ radv_device_get_cache_uuid(enum radeon_family family, void 
*uuid)
return -1;
 
memcpy(uuid, _timestamp, 4);
memcpy((char*)uuid + 4, _timestamp, 4);
memcpy((char*)uuid + 8, , 2);
-   snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
+   snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv%zd", sizeof(void 
*));
return 0;
 }
 
 static void
 radv_get_driver_uuid(void *uuid)
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl/linker: Allow unused in blocks which are not declated on previous stage

2018-08-20 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek
On Mon, Aug 20, 2018 at 9:32 AM vadym.shovkoplias
 wrote:
>
> From Section 4.3.4 (Inputs) of the GLSL 1.50 spec:
>
> "Only the input variables that are actually read need to be written
>  by the previous stage; it is allowed to have superfluous
>  declarations of input variables."
>
> Fixes:
> * interstage-multiple-shader-objects.shader_test
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101247
> Signed-off-by: Vadym Shovkoplias 
> ---
>  src/compiler/glsl/link_interface_blocks.cpp | 8 +++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/src/compiler/glsl/link_interface_blocks.cpp 
> b/src/compiler/glsl/link_interface_blocks.cpp
> index e5eca9460e..801fbcd5d9 100644
> --- a/src/compiler/glsl/link_interface_blocks.cpp
> +++ b/src/compiler/glsl/link_interface_blocks.cpp
> @@ -417,9 +417,15 @@ validate_interstage_inout_blocks(struct 
> gl_shader_program *prog,
> * write to any of the pre-defined outputs (e.g. if the vertex shader
> * does not write to gl_Position, etc), which is allowed and results in
> * undefined behavior.
> +   *
> +   * From Section 4.3.4 (Inputs) of the GLSL 1.50 spec:
> +   *
> +   *"Only the input variables that are actually read need to be 
> written
> +   * by the previous stage; it is allowed to have superfluous
> +   * declarations of input variables."
> */
>if (producer_def == NULL &&
> -  !is_builtin_gl_in_block(var, consumer->Stage)) {
> +  !is_builtin_gl_in_block(var, consumer->Stage) && var->data.used) {
>   linker_error(prog, "Input block `%s' is not an output of "
>"the previous stage\n", 
> var->get_interface_type()->name);
>   return;
> --
> 2.18.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallivm: Detect VSX separately from Altivec

2018-08-20 Thread Vicki Pfau
I was mostly following what was done earlier in the file for Altivec. I 
can move it but then ideally the Alitvec check should also be moved.



Vicki


On 08/20/2018 08:53 AM, Roland Scheidegger wrote:

u_cpu_detect should detect what's really available, not what is used
(though indeed we actually disable u_cpu bits explicitly in gallivm for
some sse features, but this is a hack).
So I think it would be better if u_cpu_detect sets the has_vsx bit
regardless what the env var is and then enable it based on this bit and
the env var.
Otherwise looks good to me.

Roland

Am 19.08.2018 um 23:17 schrieb Vicki Pfau:

Previously gallivm would attempt to use VSX instructions on all systems
where it detected that Altivec is supported; however, VSX was added to
POWER long after Altivec, causing lots of crashes on older POWER/PPC
hardware, e.g. PPC Macs. By detecting VSX separately from Altivec we can
automatically disable it on hardware that supports Altivec but not VSX

Signed-off-by: Vicki Pfau 
---
  src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 21 +++
  src/gallium/auxiliary/util/u_cpu_detect.c | 14 -
  src/gallium/auxiliary/util/u_cpu_detect.h |  1 +
  3 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 79dbedbb56..fcbdd5050f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -650,26 +650,11 @@ 
lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
  * which are fixed in LLVM 4.0.
  *
  * With LLVM 4.0 or higher:
-* Make sure VSX instructions are ENABLED, unless
-* a) the entire -mattr option is overridden via GALLIVM_MATTRS, or
-* b) VSX instructions are explicitly enabled/disabled via GALLIVM_VSX=1 or 
0.
+* Make sure VSX instructions are ENABLED (if supported), unless
+* VSX instructions are explicitly enabled/disabled via GALLIVM_VSX=1 or 0.
  */
 if (util_cpu_caps.has_altivec) {
-  char *env_mattrs = getenv("GALLIVM_MATTRS");
-  if (env_mattrs) {
- MAttrs.push_back(env_mattrs);
-  }
-  else {
- boolean enable_vsx = true;
- char *env_vsx = getenv("GALLIVM_VSX");
- if (env_vsx && env_vsx[0] == '0') {
-enable_vsx = false;
- }
- if (enable_vsx)
-MAttrs.push_back("+vsx");
- else
-MAttrs.push_back("-vsx");
-  }
+  MAttrs.push_back(util_cpu_caps.has_vsx ? "+vsx" : "-vsx");
 }
  #endif
  #endif
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c 
b/src/gallium/auxiliary/util/u_cpu_detect.c
index 3c6ae4ea1a..14003aa769 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -133,6 +133,7 @@ check_os_altivec_support(void)
signal(SIGILL, SIG_DFL);
 } else {
boolean enable_altivec = TRUE;/* Default: enable  if available, and 
if not overridden */
+  boolean enable_vsx = TRUE;
  #ifdef DEBUG
/* Disabling Altivec code generation is not the same as disabling VSX 
code generation,
 * which can be done simply by passing -mattr=-vsx to the LLVM 
compiler; cf.
@@ -144,6 +145,11 @@ check_os_altivec_support(void)
   enable_altivec = FALSE;
}
  #endif
+  /* VSX instructions can be explicitly enabled/disabled via GALLIVM_VSX=1 
or 0 */
+  char *env_vsx = getenv("GALLIVM_VSX");
+  if (env_vsx && env_vsx[0] == '0') {
+ enable_vsx = FALSE;
+  }
if (enable_altivec) {
   __lv_powerpc_canjump = 1;
  
@@ -153,8 +159,13 @@ check_os_altivec_support(void)

   :
   : "r" (-1));
  
- signal(SIGILL, SIG_DFL);

   util_cpu_caps.has_altivec = 1;
+
+ if (enable_vsx) {
+__asm __volatile("xxland %vs0, %vs0, %vs0");
+util_cpu_caps.has_vsx = 1;
+ }
+ signal(SIGILL, SIG_DFL);
} else {
   util_cpu_caps.has_altivec = 0;
}
@@ -536,6 +547,7 @@ util_cpu_detect(void)
debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", 
util_cpu_caps.has_3dnow_ext);
debug_printf("util_cpu_caps.has_xop = %u\n", util_cpu_caps.has_xop);
debug_printf("util_cpu_caps.has_altivec = %u\n", 
util_cpu_caps.has_altivec);
+  debug_printf("util_cpu_caps.has_vsx = %u\n", util_cpu_caps.has_vsx);
debug_printf("util_cpu_caps.has_neon = %u\n", util_cpu_caps.has_neon);
debug_printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz);
debug_printf("util_cpu_caps.has_avx512f = %u\n", 
util_cpu_caps.has_avx512f);
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h 
b/src/gallium/auxiliary/util/u_cpu_detect.h
index 7a63d55028..19f5567ca7 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.h
+++ b/src/gallium/auxiliary/util/u_cpu_detect.h
@@ -71,6 +71,7 @@ struct util_cpu_caps {
 unsigned 

Re: [Mesa-dev] [PATCH 00/13] Implement INTEL_shader_atomic_float_minmax

2018-08-20 Thread Caio Marcelo de Oliveira Filho
> The code is here:
> 
> 
> https://cgit.freedesktop.org/~idr/mesa/log/?h=INTEL_shader_atomic_float_minmax

The series is

Reviewed-by: Caio Marcelo de Oliveira Filho 


I'd consider adding some clarification about not quietizing sNaN
and/or relaxing the definition in the spec (patch 3).



> and the tests are here:
> 
> 
> https://cgit.freedesktop.org/~idr/piglit/log/?h=INTEL_shader_atomic_float_minmax

The series (7 patches prefixed with intel_...) is

Reviewed-by: Caio Marcelo de Oliveira Filho 

given comments below for individual patches are addressed

Patch 2: typo "bit" instead of "but" in the bootstrapping comment
block.

Patch 6: the test calls floatBitsToUint(atomicCompSwap(...)), but the
result of the conversion is not used, maybe take it off.

Patch 7: typos in the commit message "quite NaN" ("quiet NaN") and
"cognitave" (instead of "cognitive"). Would consider early return if
already_set_color is set.



Thanks,
Caio


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] compiler: add SYSTEM_VALUE_VARYING_COORD

2018-08-20 Thread Rob Clark
On Mon, Aug 20, 2018 at 7:20 PM Marek Olšák  wrote:
>
> On Mon, Aug 20, 2018 at 7:06 PM Rob Clark  wrote:
> >
> > On Mon, Aug 20, 2018 at 6:54 PM Bas Nieuwenhuizen
> >  wrote:
> > >
> > > On Tue, Aug 21, 2018 at 12:38 AM, Marek Olšák  wrote:
> > > > On Fri, Aug 10, 2018 at 9:26 AM Rob Clark  wrote:
> > > >>
> > > >> Used internally in freedreno/ir3 for the vec2 value that hw passes to
> > > >> shader to use as coordinate for bary.f (varying fetch) instruction.
> > > >> This is not the same as SYSTEM_VALUE_FRAG_COORD.
> > > >>
> > > >> Signed-off-by: Rob Clark 
> > > >> ---
> > > >> Up until now, we'd been hard-coding the location of this value (ie. to
> > > >> r0.xy), mostly because originally in the early a3xx days I didn't know
> > > >> which bits could configure this value (blob was always using r0.xy so
> > > >> in cmdstream traces it always showed up as 0's).
> > > >>
> > > >> But starting with a6xx, the address register aliases r0.x, which kinda
> > > >> throws a monkey-wrench in the existing scheme of hard-coding.  The good
> > > >> news is that I know the bits to configure this value for a3xx-a6xx.
> > > >>
> > > >> So I'm shifting over to handling this like a sysval.
> > > >>
> > > >>  src/compiler/shader_enums.c| 1 +
> > > >>  src/compiler/shader_enums.h| 6 ++
> > > >>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 +
> > > >>  3 files changed, 8 insertions(+)
> > > >>
> > > >> diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c
> > > >> index a874083a0b7..0210b503d3f 100644
> > > >> --- a/src/compiler/shader_enums.c
> > > >> +++ b/src/compiler/shader_enums.c
> > > >> @@ -244,6 +244,7 @@ gl_system_value_name(gl_system_value sysval)
> > > >>   ENUM(SYSTEM_VALUE_DEVICE_INDEX),
> > > >>   ENUM(SYSTEM_VALUE_VIEW_INDEX),
> > > >>   ENUM(SYSTEM_VALUE_VERTEX_CNT),
> > > >> + ENUM(SYSTEM_VALUE_VARYING_COORD),
> > > >> };
> > > >> STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX);
> > > >> return NAME(sysval);
> > > >> diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h
> > > >> index f8e22925f35..5c36f55283c 100644
> > > >> --- a/src/compiler/shader_enums.h
> > > >> +++ b/src/compiler/shader_enums.h
> > > >> @@ -601,6 +601,12 @@ typedef enum
> > > >>  */
> > > >> SYSTEM_VALUE_VERTEX_CNT,
> > > >>
> > > >> +   /**
> > > >> +* Driver internal varying-coord, used for varying-fetch 
> > > >> instructions.
> > > >> +* Not externally visible.
> > > >> +*/
> > > >
> > > > Can you improve the documentation, so that mere mortals understand
> > > > what it means? Does it correspond to something in AMD hw?
> > >
> > > I'd expect this to correspond to stuff like PERSP_CENTER on AMD?
> > >
> > > Which begs the question, does this need distinguishing on
> > > center/centroid/per-sample?
> >
> > so, adreno, being gles hw, has just "smooth" and "flat" varyings..
> >
> > that said, I've been kinda thinking of this as an opaque driver
> > specific sysval without paying too much attention to what the value
> > actually is other than "the thing you pass to bary.f to get non-flat
> > varyings"..  I confess to not having looked into how this works on
> > other hw..
>
> GIven what you said, the variable contains barycentric coordinates
> (i,j) for perspective interpolation at the pixel center. It's the same
> as "vec2 gl_BaryCoordSmoothAMD;" from
> GL_AMD_shader_explicit_vertex_parameter.
>

That seems like a reasonable assumption.. I suppose if there is
mesa+piglit support for gl_BaryCoordSmoothAMD I could try wiring it up
to prove that theory..

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 05/11] nir: Add a structure splitting pass

2018-08-20 Thread Caio Marcelo de Oliveira Filho
On Sat, Jul 28, 2018 at 10:44:36PM -0700, Jason Ekstrand wrote:
> This pass doesn't really do much now because nir_lower_vars_to_ssa can
> already see through structures and considers them to be "split".  This
> pass exists to help other passes more easily see through structure
> variables.  If a back-end does implement arrays using scratch or
> indirects on registers, having more smaller arrays is likely to have
> better memory efficiency.
> ---
>  src/compiler/Makefile.sources |   1 +
>  src/compiler/nir/meson.build  |   1 +
>  src/compiler/nir/nir.h|   1 +
>  src/compiler/nir/nir_split_vars.c | 271 ++
>  4 files changed, 274 insertions(+)
>  create mode 100644 src/compiler/nir/nir_split_vars.c

With the fix below, this patch is

Reviewed-by: Caio Marcelo de Oliveira Filho 


> +static void
> +init_field_for_type(struct field *field, struct field *parent,
> +const struct glsl_type *type,
> +const char *name,
> +struct split_var_state *state)
> +{
> +   *field = (struct field) {
> +  .parent = parent,
> +  .type = type,
> +   };
> +
> +   const struct glsl_type *struct_type = glsl_without_array(type);
> +   if (glsl_type_is_struct(struct_type)) {
> +  field->num_fields = glsl_get_length(struct_type),
> +  field->fields = ralloc_array(state->mem_ctx, struct field,
> +   field->num_fields);
> +  for (unsigned i = 0; i < field->num_fields; i++) {
> + char *field_name = NULL;
> + if (name) {
> +ralloc_asprintf(state->mem_ctx, "%s_%s", name,
> +glsl_get_struct_elem_name(struct_type, i));

Store the result of asprintf:

field_name = ralloc_asprintf(...);

And from previous review:

Maybe if no name for the parent is available, use something in
this place ("unnamed", or whatever).  That way the rest of the
hierarchy doesn't lose the meaning completely.



Thanks,
Caio
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] gallium/format: Add a helper to combine separate Z24 and S8 stencil.

2018-08-20 Thread Eric Anholt
Kenneth Graunke  writes:

> This new function takes separate Z24 depth and S8 stencil sources,
> and packs them into a single combined Z24S8 buffer.
> ---
>  src/gallium/auxiliary/util/u_format_zs.c | 20 
>  src/gallium/auxiliary/util/u_format_zs.h |  2 ++
>  2 files changed, 22 insertions(+)
>
> diff --git a/src/gallium/auxiliary/util/u_format_zs.c 
> b/src/gallium/auxiliary/util/u_format_zs.c
> index 69f2f2971f7..4138fbe6503 100644
> --- a/src/gallium/auxiliary/util/u_format_zs.c
> +++ b/src/gallium/auxiliary/util/u_format_zs.c
> @@ -448,6 +448,26 @@ util_format_z24_unorm_s8_uint_pack_s_8uint(uint8_t 
> *dst_row, unsigned dst_stride
> }
>  }
>  
> +void
> +util_format_z24_unorm_s8_uint_pack_separate(uint8_t *dst_row, unsigned 
> dst_stride,
> +const uint32_t *z_src_row, 
> unsigned z_src_stride,
> +const uint8_t *s_src_row, 
> unsigned s_src_stride,
> +unsigned width, unsigned height)
> +{
> +   unsigned x, y;
> +   for(y = 0; y < height; ++y) {
> +  const uint32_t *z_src = z_src_row;
> +  const uint8_t *s_src = s_src_row;
> +  uint32_t *dst = (uint32_t *)dst_row;
> +  for(x = 0; x < width; ++x) {
> + *dst++ = (*z_src++ & 0x00ff) | (*s_src++ << 24);
> +  }
> +  dst_row += dst_stride/sizeof(*dst_row);
> +  z_src_row += z_src_stride/sizeof(*z_src_row);
> +  s_src_row += s_src_stride/sizeof(*s_src_row);
> +   }
> +}

missing spaces after for and around binary operators.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/eu: print bytes instead of 32 bit hex value

2018-08-20 Thread Sagar Ghuge


On 08/20/2018 11:06 AM, Matt Turner wrote:
> Cool. This looks pretty good to me. A few comments inline.
> 
> On Wed, Aug 15, 2018 at 2:00 PM Sagar Ghuge  wrote:
>>
>> INTEL_DEBUG=hex prints 32 bit hex value
>> and due to endianness of CPU byte order is
>> reversed. In order to disassemble binary
>> files, print each byte instead of 32 bit hex
>> value.
> 
> Let's get your editor configured to line wrap at the correct length
> (these lines are too short).
> 
> If you use vim, you should be able to automatically line wrap to the
> appropriate length by highlighting the lines and then giving the
> command 'gq'
> 
>> Signed-off-by: Sagar Ghuge 
>> ---
>>  src/intel/compiler/brw_eu.c | 24 
>>  1 file changed, 16 insertions(+), 8 deletions(-)
>>
>> diff --git a/src/intel/compiler/brw_eu.c b/src/intel/compiler/brw_eu.c
>> index 6ef0a6a577..223e561dff 100644
>> --- a/src/intel/compiler/brw_eu.c
>> +++ b/src/intel/compiler/brw_eu.c
>> @@ -365,9 +365,14 @@ brw_disassemble(const struct gen_device_info *devinfo,
>>if (compacted) {
>>   brw_compact_inst *compacted = (void *)insn;
>>  if (dump_hex) {
>> -   fprintf(out, "0x%08x 0x%08x   ",
>> -   ((uint32_t *)insn)[1],
>> -   ((uint32_t *)insn)[0]);
>> +   unsigned char * insn_ptr = ((unsigned char *)[0]);
>> +   for (int i = 0 ; i < 8; i = i + 4) {
>> +  fprintf(out, "%02x %02x %02x %02x ",
>> +  insn_ptr[i],
>> +  insn_ptr[i + 1],
>> +  insn_ptr[i + 2],
>> +  insn_ptr[i + 3]);
>> +   }
> 
> I like printing the spaces between the bytes. That really shows more
> clearly that this is a byte array and not subject to any endianness
> issues.
> 
> One suggestion: let's print some blank spaces after the compacted
> instruction hex so that the disassembled instruction vertically aligns
> with uncompacted instructions. Currently we get disassembly that looks
> like
>

Thanks for reviewing the patch. Yes, I made changes and sent v2 according to
your suggestions.
 
> 01 0b 1d 20 00 7c 02 00 mov(8)  g124<1>Fg2.3<0,1,0>F
> 01 00 60 00 e8 3a a0 2f 5c 00 00 00 00 00 00 00 mov(8)
> g125<1>Fg2.7<0,1,0>F
> 
> Also, we don't use tabs in i965. When editing old lines that had tabs,
> let's take the opportunity to remove them.
> 
> My ~/.vimrc has
> 
> autocmd BufNewFile,BufRead /home/mattst88/projects/mesa/* set
> expandtab tabstop=8 softtabstop=3 shiftwidth=3
> autocmd BufNewFile,BufRead
> /home/mattst88/projects/mesa/src/glsl/glcpp/* set noexpandtab
> tabstop=8 softtabstop=8 shiftwidth=8
> autocmd BufNewFile,BufRead
> /home/mattst88/projects/mesa/src/glsl/glsl_parser.yy set noexpandtab
> tabstop=8 shiftwidth=8
> autocmd BufNewFile,BufRead /home/mattst88/projects/piglit/* set
> noexpandtab tabstop=8 softtabstop=8 shiftwidth=8
> 
> to configure it appropriately for my Mesa and piglit directories.
> 

Thanks for sharing vimrc. I think my struggle ends here about getting coding 
style correct :)

> With those couple of small nits fixed, this will earn my review.
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/tools: new i965_disasm tool

2018-08-20 Thread Sagar Ghuge
Thanks for reviewing the patch. I will make changes and send v2 accordingly. 


On 08/20/2018 11:34 AM, Matt Turner wrote:
> On Thu, Aug 16, 2018 at 1:51 PM Sagar Ghuge  wrote:
>>
>> Adds a new i965 instruction disassemble tool
> 
> This looks very good. A few comments about the structure inline.
> 
>> Signed-off-by: Sagar Ghuge 
>> ---
>>  src/intel/Makefile.tools.am   |  15 +++
>>  src/intel/tools/i965_disasm.c | 202 ++
>>  src/intel/tools/i965_disasm.h |  46 
>>  src/intel/tools/meson.build   |  11 ++
>>  4 files changed, 274 insertions(+)
>>  create mode 100644 src/intel/tools/i965_disasm.c
>>  create mode 100644 src/intel/tools/i965_disasm.h
>>
>> diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
>> index 00624084e6..36a3a70a28 100644
>> --- a/src/intel/Makefile.tools.am
>> +++ b/src/intel/Makefile.tools.am
>> @@ -22,6 +22,7 @@
>>  noinst_PROGRAMS += \
>> tools/aubinator \
>> tools/aubinator_error_decode \
>> +   tools/i965_disasm \
>> tools/error2aub
>>
>>
>> @@ -62,6 +63,20 @@ tools_aubinator_error_decode_CFLAGS = \
>> $(AM_CFLAGS) \
>> $(ZLIB_CFLAGS)
>>
>> +tools_i965_disasm_SOURCES = \
>> +   tools/i965_disasm.c \
>> +   tools/i965_disasm.h
>> +
>> +tools_i965_disasm_LDADD = \
>> +   common/libintel_common.la \
>> +   compiler/libintel_compiler.la \
>> +   dev/libintel_dev.la \
>> +   $(top_builddir)/src/util/libmesautil.la \
>> +   $(PTHREAD_LIBS)
>> +
>> +tools_i965_disasm_CFLAGS = \
>> +   $(AM_CFLAGS)
>> +
> 
> Looks good.
> 
>>  tools_error2aub_SOURCES = \
>> tools/gen_context.h \
>> diff --git a/src/intel/tools/i965_disasm.c b/src/intel/tools/i965_disasm.c
>> new file mode 100644
>> index 00..c880559827
>> --- /dev/null
>> +++ b/src/intel/tools/i965_disasm.c
>> @@ -0,0 +1,202 @@
>> +/*
>> + * Copyright © 2018 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the 
>> "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
>> OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
>> OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
>> DEALINGS
>> + * IN THE SOFTWARE.
>> + */
>> +
>> +#include 
>> +#include 
>> +
>> +#include "compiler/brw_inst.h"
>> +#include "compiler/brw_eu.h"
>> +
>> +#include "i965_disasm.h"
>> +
>> +uint64_t INTEL_DEBUG;
>> +uint16_t pci_id = 0;
>> +FILE *outfile;
>> +
>> +struct i965_disasm {
>> +struct gen_device_info devinfo;
>> +};
>> +
>> +/* Return size of file in bytes pointed by fp */
>> +static size_t
>> +i965_disasm_get_file_size(FILE *fp)
>> +{
>> +   size_t size = 0;
> 
> No need for initialization.
> 
>> +
>> +   fseek(fp, 0L, SEEK_END);
>> +   size = ftell(fp);
>> +   fseek(fp, 0L, SEEK_SET);
>> +
>> +   return size;
>> +}
>> +
>> +/* Return number of bytes read */
>> +static size_t
>> +i965_disasm_read_binary(FILE *fp, void **assembly)
>> +{
>> +   size_t end = i965_disasm_get_file_size(fp);
>> +   *assembly = malloc(end + 1);
>> +   fread(*assembly, end, 1, fp);
>> +   fclose(fp);
>> +
>> +   return end;
>> +}
>> +
>> +static void
>> +print_help(const char *progname, FILE *file)
>> +{
>> +   fprintf(file,
>> +   "Usage: %s [OPTION]...\n"
>> +   "Disassemble i965 instructions from binary file.\n\n"
>> +   "  --help display this help and exit\n"
>> +   "  --binary-path=PATH read binary file from binary file 
>> PATH\n"
>> +   "  --gen=platform disassemble instructions for given \n"
>> +   " platform (3 letter platform name)\n",
>> +   progname);
>> +}
>> +
>> +int main(int argc, char *argv[])
>> +{
>> +   FILE *fp = NULL;
>> +   void *assembly = NULL;
>> +   char *binary_path = NULL;
>> +   size_t start = 0, end = 0;
>> +   int c, i;
>> +   struct i965_disasm *disasm;
>> +
>> +   bool help = false;
>> +   const struct option i965_disasm_opts[] = {
>> +  { "help",  no_argument,   (int *) ,  true },
>> +  { "binary-path",   

Re: [Mesa-dev] [PATCH] compiler: add SYSTEM_VALUE_VARYING_COORD

2018-08-20 Thread Rob Clark
On Mon, Aug 20, 2018 at 6:54 PM Bas Nieuwenhuizen
 wrote:
>
> On Tue, Aug 21, 2018 at 12:38 AM, Marek Olšák  wrote:
> > On Fri, Aug 10, 2018 at 9:26 AM Rob Clark  wrote:
> >>
> >> Used internally in freedreno/ir3 for the vec2 value that hw passes to
> >> shader to use as coordinate for bary.f (varying fetch) instruction.
> >> This is not the same as SYSTEM_VALUE_FRAG_COORD.
> >>
> >> Signed-off-by: Rob Clark 
> >> ---
> >> Up until now, we'd been hard-coding the location of this value (ie. to
> >> r0.xy), mostly because originally in the early a3xx days I didn't know
> >> which bits could configure this value (blob was always using r0.xy so
> >> in cmdstream traces it always showed up as 0's).
> >>
> >> But starting with a6xx, the address register aliases r0.x, which kinda
> >> throws a monkey-wrench in the existing scheme of hard-coding.  The good
> >> news is that I know the bits to configure this value for a3xx-a6xx.
> >>
> >> So I'm shifting over to handling this like a sysval.
> >>
> >>  src/compiler/shader_enums.c| 1 +
> >>  src/compiler/shader_enums.h| 6 ++
> >>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 +
> >>  3 files changed, 8 insertions(+)
> >>
> >> diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c
> >> index a874083a0b7..0210b503d3f 100644
> >> --- a/src/compiler/shader_enums.c
> >> +++ b/src/compiler/shader_enums.c
> >> @@ -244,6 +244,7 @@ gl_system_value_name(gl_system_value sysval)
> >>   ENUM(SYSTEM_VALUE_DEVICE_INDEX),
> >>   ENUM(SYSTEM_VALUE_VIEW_INDEX),
> >>   ENUM(SYSTEM_VALUE_VERTEX_CNT),
> >> + ENUM(SYSTEM_VALUE_VARYING_COORD),
> >> };
> >> STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX);
> >> return NAME(sysval);
> >> diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h
> >> index f8e22925f35..5c36f55283c 100644
> >> --- a/src/compiler/shader_enums.h
> >> +++ b/src/compiler/shader_enums.h
> >> @@ -601,6 +601,12 @@ typedef enum
> >>  */
> >> SYSTEM_VALUE_VERTEX_CNT,
> >>
> >> +   /**
> >> +* Driver internal varying-coord, used for varying-fetch instructions.
> >> +* Not externally visible.
> >> +*/
> >
> > Can you improve the documentation, so that mere mortals understand
> > what it means? Does it correspond to something in AMD hw?
>
> I'd expect this to correspond to stuff like PERSP_CENTER on AMD?
>
> Which begs the question, does this need distinguishing on
> center/centroid/per-sample?

so, adreno, being gles hw, has just "smooth" and "flat" varyings..

that said, I've been kinda thinking of this as an opaque driver
specific sysval without paying too much attention to what the value
actually is other than "the thing you pass to bary.f to get non-flat
varyings"..  I confess to not having looked into how this works on
other hw..

BR,
-R


>
> - Bas
> >
> > Marek
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] gallium/u_transfer_helper: Add support for separate Z24/S8 as well.

2018-08-20 Thread Eric Anholt
Kenneth Graunke  writes:

> u_transfer_helper already had code to handle treating packed Z32_S8
> as separate Z32_FLOAT and S8_UINT resources, since some drivers can't
> handle that interleaved format natively.
>
> Other hardware needs depth and stencil as separate resources for all
> formats.  For example, V3D3 needs this for 24-bit depth as well.
>
> This patch adds a new flag to lower all depth/stencils formats, and
> implements support for Z24_UNORM_S8_UINT.  (S8_UINT_Z24_UNORM is left
> as an exercise to the reader, preferably someone who has access to a
> machine that uses that format.)

It won't be directly usable for V3D, because we only want separate
stencil z24s8 in the MSAA case.  These kinds of complications are why I
wanted a helper library, not a midlayer.

However, this does look good and should bring your project closer to
completion, so:

Reviewed-by: Eric Anholt 


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallivm: Detect VSX separately from Altivec

2018-08-20 Thread Roland Scheidegger
Alright, I guess it's ok then.
In theory the u_cpu_detect bits could be used in different places, for
instance the translate code emits its own sse code, and as long as a
feature was detected properly it may make sense to disable it only for
some users. Albeit llvm setup and the gallivm code need to agree
generally, and there's no good way to deal with this right now (I
suppose gallivm actually should use its own copy of the u_cpu bits). The
fiddling we do in lp_bld_init() wrt SSE (LP_FORCE_SSE2 and also avx
disabling) isn't a clean way neither.
So this looks like as good a solution as others.

Reviewed-by: Roland Scheidegger 

Am 20.08.2018 um 22:15 schrieb Vicki Pfau:
> I was mostly following what was done earlier in the file for Altivec. I
> can move it but then ideally the Alitvec check should also be moved.
> 
> 
> Vicki
> 
> 
> On 08/20/2018 08:53 AM, Roland Scheidegger wrote:
>> u_cpu_detect should detect what's really available, not what is used
>> (though indeed we actually disable u_cpu bits explicitly in gallivm for
>> some sse features, but this is a hack).
>> So I think it would be better if u_cpu_detect sets the has_vsx bit
>> regardless what the env var is and then enable it based on this bit and
>> the env var.
>> Otherwise looks good to me.
>>
>> Roland
>>
>> Am 19.08.2018 um 23:17 schrieb Vicki Pfau:
>>> Previously gallivm would attempt to use VSX instructions on all systems
>>> where it detected that Altivec is supported; however, VSX was added to
>>> POWER long after Altivec, causing lots of crashes on older POWER/PPC
>>> hardware, e.g. PPC Macs. By detecting VSX separately from Altivec we can
>>> automatically disable it on hardware that supports Altivec but not VSX
>>>
>>> Signed-off-by: Vicki Pfau 
>>> ---
>>>   src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 21 +++
>>>   src/gallium/auxiliary/util/u_cpu_detect.c | 14 -
>>>   src/gallium/auxiliary/util/u_cpu_detect.h |  1 +
>>>   3 files changed, 17 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
>>> b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
>>> index 79dbedbb56..fcbdd5050f 100644
>>> --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
>>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
>>> @@ -650,26 +650,11 @@
>>> lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
>>>   * which are fixed in LLVM 4.0.
>>>   *
>>>   * With LLVM 4.0 or higher:
>>> -    * Make sure VSX instructions are ENABLED, unless
>>> -    * a) the entire -mattr option is overridden via GALLIVM_MATTRS, or
>>> -    * b) VSX instructions are explicitly enabled/disabled via
>>> GALLIVM_VSX=1 or 0.
>>> +    * Make sure VSX instructions are ENABLED (if supported), unless
>>> +    * VSX instructions are explicitly enabled/disabled via
>>> GALLIVM_VSX=1 or 0.
>>>   */
>>>  if (util_cpu_caps.has_altivec) {
>>> -  char *env_mattrs = getenv("GALLIVM_MATTRS");
>>> -  if (env_mattrs) {
>>> - MAttrs.push_back(env_mattrs);
>>> -  }
>>> -  else {
>>> - boolean enable_vsx = true;
>>> - char *env_vsx = getenv("GALLIVM_VSX");
>>> - if (env_vsx && env_vsx[0] == '0') {
>>> -    enable_vsx = false;
>>> - }
>>> - if (enable_vsx)
>>> -    MAttrs.push_back("+vsx");
>>> - else
>>> -    MAttrs.push_back("-vsx");
>>> -  }
>>> +  MAttrs.push_back(util_cpu_caps.has_vsx ? "+vsx" : "-vsx");
>>>  }
>>>   #endif
>>>   #endif
>>> diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c
>>> b/src/gallium/auxiliary/util/u_cpu_detect.c
>>> index 3c6ae4ea1a..14003aa769 100644
>>> --- a/src/gallium/auxiliary/util/u_cpu_detect.c
>>> +++ b/src/gallium/auxiliary/util/u_cpu_detect.c
>>> @@ -133,6 +133,7 @@ check_os_altivec_support(void)
>>>     signal(SIGILL, SIG_DFL);
>>>  } else {
>>>     boolean enable_altivec = TRUE;    /* Default: enable  if
>>> available, and if not overridden */
>>> +  boolean enable_vsx = TRUE;
>>>   #ifdef DEBUG
>>>     /* Disabling Altivec code generation is not the same as
>>> disabling VSX code generation,
>>>  * which can be done simply by passing -mattr=-vsx to the
>>> LLVM compiler; cf.
>>> @@ -144,6 +145,11 @@ check_os_altivec_support(void)
>>>    enable_altivec = FALSE;
>>>     }
>>>   #endif
>>> +  /* VSX instructions can be explicitly enabled/disabled via
>>> GALLIVM_VSX=1 or 0 */
>>> +  char *env_vsx = getenv("GALLIVM_VSX");
>>> +  if (env_vsx && env_vsx[0] == '0') {
>>> + enable_vsx = FALSE;
>>> +  }
>>>     if (enable_altivec) {
>>>    __lv_powerpc_canjump = 1;
>>>   @@ -153,8 +159,13 @@ check_os_altivec_support(void)
>>>    :
>>>    : "r" (-1));
>>>   - signal(SIGILL, SIG_DFL);
>>>    util_cpu_caps.has_altivec = 1;
>>> +
>>> + if (enable_vsx) {
>>> +    __asm __volatile("xxland %vs0, %vs0, %vs0");

[Mesa-dev] [PATCH v2] intel/tools: new i965_disasm tool

2018-08-20 Thread Sagar Ghuge
Adds a new i965 instruction disassemble tool

v2: 1) fix a few nits (Matt Turner)
2) Remove i965_disasm header (Matt Turner)

Signed-off-by: Sagar Ghuge 
---
 src/intel/Makefile.tools.am   |  14 +++
 src/intel/tools/i965_disasm.c | 199 ++
 src/intel/tools/meson.build   |  11 ++
 3 files changed, 224 insertions(+)
 create mode 100644 src/intel/tools/i965_disasm.c

diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
index 00624084e6..385819abc2 100644
--- a/src/intel/Makefile.tools.am
+++ b/src/intel/Makefile.tools.am
@@ -22,6 +22,7 @@
 noinst_PROGRAMS += \
tools/aubinator \
tools/aubinator_error_decode \
+   tools/i965_disasm \
tools/error2aub
 
 
@@ -62,6 +63,19 @@ tools_aubinator_error_decode_CFLAGS = \
$(AM_CFLAGS) \
$(ZLIB_CFLAGS)
 
+tools_i965_disasm_SOURCES = \
+   tools/i965_disasm.c
+
+tools_i965_disasm_LDADD = \
+   common/libintel_common.la \
+   compiler/libintel_compiler.la \
+   dev/libintel_dev.la \
+   $(top_builddir)/src/util/libmesautil.la \
+   $(PTHREAD_LIBS)
+
+tools_i965_disasm_CFLAGS = \
+   $(AM_CFLAGS)
+
 
 tools_error2aub_SOURCES = \
tools/gen_context.h \
diff --git a/src/intel/tools/i965_disasm.c b/src/intel/tools/i965_disasm.c
new file mode 100644
index 00..757d2c7db1
--- /dev/null
+++ b/src/intel/tools/i965_disasm.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+
+#include "compiler/brw_inst.h"
+#include "compiler/brw_eu.h"
+#include "dev/gen_device_info.h"
+
+uint64_t INTEL_DEBUG;
+uint16_t pci_id = 0;
+FILE *outfile;
+
+struct i965_disasm {
+struct gen_device_info devinfo;
+};
+
+/* Return size of file in bytes pointed by fp */
+static size_t
+i965_disasm_get_file_size(FILE *fp)
+{
+   size_t size;
+
+   fseek(fp, 0L, SEEK_END);
+   size = ftell(fp);
+   fseek(fp, 0L, SEEK_SET);
+
+   return size;
+}
+
+/* Return number of bytes read */
+static size_t
+i965_disasm_read_binary(FILE *fp, void **assembly)
+{
+   size_t end = i965_disasm_get_file_size(fp);
+   *assembly = malloc(end + 1);
+   fread(*assembly, end, 1, fp);
+   fclose(fp);
+
+   return end;
+}
+
+/* Disassemble i965 instructions from buffer assembly
+ * start : starting offset within buffer
+ * end : points to last byte of buffer
+ */
+static void
+i965_disasm_disassemble(struct i965_disasm *disasm, void *assembly,
+int start, int end, FILE *out)
+{
+   brw_disassemble(>devinfo, assembly, start, end, out);
+}
+
+static struct i965_disasm *
+i965_disasm_init(void)
+{
+   struct gen_device_info devinfo;
+   struct i965_disasm *i965d;
+
+   i965d = malloc(sizeof *i965d);
+   if (i965d == NULL)
+  return NULL;
+
+   if(!gen_get_device_info(pci_id, )) {
+  fprintf(outfile, "can't find device information: pci_id=0x%x\n",
+  pci_id);
+  exit(EXIT_FAILURE);
+   }
+
+   i965d->devinfo = devinfo;
+
+   /* initialize compaction table in order
+* to handle compacted instructions
+*/
+   brw_init_compaction_tables(>devinfo);
+
+   return i965d;
+}
+
+static void
+i965_disasm_destroy(struct i965_disasm *disasm)
+{
+   free(disasm);
+}
+
+static void
+print_help(const char *progname, FILE *file)
+{
+   fprintf(file,
+   "Usage: %s [OPTION]...\n"
+   "Disassemble i965 instructions from binary file.\n\n"
+   "  --help display this help and exit\n"
+   "  --binary-path=PATH read binary file from binary file PATH\n"
+   "  --gen=platform disassemble instructions for given \n"
+   " platform (3 letter platform name)\n",
+   progname);
+}
+
+int main(int argc, char *argv[])
+{
+   FILE *fp = NULL;
+   void *assembly = NULL;
+   char *binary_path = NULL;
+   size_t start = 0, end = 

Re: [Mesa-dev] [PATCH] i965: Advertise 8 bits subpixel precision for viewport bounds on gen6+

2018-08-20 Thread Andres Gomez
Danylo, should we also include this in the stable queues ?


On Mon, 2018-06-18 at 15:50 +0300, Danylo Piliaiev wrote:
> We use floating-points for viewport bounds so VIEWPORT_SUBPIXEL_BITS
> should reflect this.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105975
> 
> Signed-off-by: Danylo Piliaiev 
> ---
>  src/mesa/drivers/dri/i965/brw_context.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
> b/src/mesa/drivers/dri/i965/brw_context.c
> index 9ced230..eacf326 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -688,7 +688,7 @@ brw_initialize_context_constants(struct brw_context *brw)
> /* ARB_viewport_array, OES_viewport_array */
> if (devinfo->gen >= 6) {
>ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
> -  ctx->Const.ViewportSubpixelBits = 0;
> +  ctx->Const.ViewportSubpixelBits = 8;
>  
>/* Cast to float before negating because MaxViewportWidth is unsigned.
> */
-- 
Br,

Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/12] amd/addrlib: mark *pEqToCheck as MAYBE_UNUSED in Addr::V2::Gfx9Lib::ComputeStereoInfo

2018-08-20 Thread Marek Olšák
On Sat, Aug 18, 2018 at 7:16 AM Kai Wasserbäch
 wrote:
>
> Only used, when asserts are enabled.
>
> Fixes an unused-variable warning with GCC 8:
>  ../../../src/amd/addrlib/gfx9/gfx9addrlib.cpp: In member function 
> 'ADDR_E_RETURNCODE Addr::V2::Gfx9Lib::ComputeStereoInfo(const 
> ADDR2_COMPUTE_SURFACE_INFO_INPUT*, ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*, 
> unsigned int*) const':
>  ../../../src/amd/addrlib/gfx9/gfx9addrlib.cpp:3879:34: warning: unused 
> variable 'pEqToCheck' [-Wunused-variable]
>   const ADDR_EQUATION *pEqToCheck= 
> _equationTable[eqIndex];
>^~
>
> Cc: Marek Olšák 
> Signed-off-by: Kai Wasserbäch 
> ---
>  src/amd/addrlib/gfx9/gfx9addrlib.cpp | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.cpp 
> b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
> index 853a041158..524b0f6e82 100644
> --- a/src/amd/addrlib/gfx9/gfx9addrlib.cpp
> +++ b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
> @@ -3873,12 +3873,12 @@ ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
>  {
>  if (IsXor(pIn->swizzleMode))
>  {
> -const UINT_32blkSizeLog2   = 
> GetBlockSizeLog2(pIn->swizzleMode);
> -const UINT_32numPipeBits   = 
> GetPipeXorBits(blkSizeLog2);
> -const UINT_32numBankBits   = 
> GetBankXorBits(blkSizeLog2);
> -const UINT_32bppLog2   = Log2(pIn->bpp >> 3);
> -const UINT_32maxYCoordBlock256 = 
> Log2(Block256_2d[bppLog2].h) - 1;
> -const ADDR_EQUATION *pEqToCheck= 
> _equationTable[eqIndex];
> +const UINT_32 blkSizeLog2   = 
> GetBlockSizeLog2(pIn->swizzleMode);
> +const UINT_32 numPipeBits   = 
> GetPipeXorBits(blkSizeLog2);
> +const UINT_32 numBankBits   = 
> GetBankXorBits(blkSizeLog2);
> +const UINT_32 bppLog2   = 
> Log2(pIn->bpp >> 3);
> +const UINT_32 maxYCoordBlock256 = 
> Log2(Block256_2d[bppLog2].h) - 1;

Please don't change the formatting of unrelated code.

Marek

> +MAYBE_UNUSED const ADDR_EQUATION *pEqToCheck= 
> _equationTable[eqIndex];
>
>  ADDR_ASSERT(maxYCoordBlock256 ==
>  GetMaxValidChannelIndex(>addr[0], 
> GetBlockSizeLog2(ADDR_SW_256B), 1));
> --
> 2.18.0
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/12] amd/addrlib: mark numPipes as MAYBE_UNUSED in Addr::V1::EgBasedLib::SanityCheckMacroTiled

2018-08-20 Thread Marek Olšák
On Sat, Aug 18, 2018 at 7:16 AM Kai Wasserbäch
 wrote:
>
> Only used, when asserts are enabled.
>
> Fixes an unused-variable warning with GCC 8:
>  ../../../src/amd/addrlib/r800/egbaddrlib.cpp: In member function 'int 
> Addr::V1::EgBasedLib::SanityCheckMacroTiled(ADDR_TILEINFO*) const':
>  ../../../src/amd/addrlib/r800/egbaddrlib.cpp:982:13: warning: unused 
> variable 'numPipes' [-Wunused-variable]
>   UINT_32 numPipes= HwlGetPipes(pTileInfo);
>   ^~~~
>
> Cc: Marek Olšák 
> Signed-off-by: Kai Wasserbäch 
> ---
>  src/amd/addrlib/r800/egbaddrlib.cpp | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/addrlib/r800/egbaddrlib.cpp 
> b/src/amd/addrlib/r800/egbaddrlib.cpp
> index 3947cfda2f..37a60c7a85 100644
> --- a/src/amd/addrlib/r800/egbaddrlib.cpp
> +++ b/src/amd/addrlib/r800/egbaddrlib.cpp
> @@ -33,6 +33,8 @@
>
>  #include "egbaddrlib.h"
>
> +#include "util/macros.h"
> +
>  namespace Addr
>  {
>  namespace V1
> @@ -978,8 +980,8 @@ BOOL_32 EgBasedLib::SanityCheckMacroTiled(
>  ADDR_TILEINFO* pTileInfo   ///< [in] macro-tiled parameters
>  ) const
>  {
> -BOOL_32 valid   = TRUE;

Please don't change the formatting of unrelated code.

Marek

> -UINT_32 numPipes= HwlGetPipes(pTileInfo);
> +BOOL_32  valid   = TRUE;
> +MAYBE_UNUSED UINT_32 numPipes= HwlGetPipes(pTileInfo);
>
>  switch (pTileInfo->banks)
>  {
> --
> 2.18.0
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC][PATCH 3/5] mesa: Add support for AMD_depth_clamp_separate

2018-08-20 Thread Marek Olšák
I wouldn't add _DepthClamp. Having just DepthClampNear and
DepthClampFar should be enough. Drivers not supporting the extension
can use either variable, because they will be equal.

The glGet query can be handled as LOC_CUSTOM.

Marek

On Sun, Aug 19, 2018 at 6:43 PM Sagar Ghuge  wrote:
>
>
>
> On 08/13/2018 03:52 PM, Ian Romanick wrote:
> > On 08/09/2018 01:09 PM, Marek Olšák wrote:
> >> On Wed, Aug 1, 2018 at 11:31 PM, Sagar Ghuge  wrote:
> >>> enable _mesa_PushAttrib() and _mesa_PopAttrib()
> >>> to handle GL_DEPTH_CLAMP_NEAR_AMD and
> >>> GL_DEPTH_CLAMP_FAR_AMD tokens.
> >>>
> >>> Signed-off-by: Sagar Ghuge 
> >>> ---
> >>>  src/mesa/main/attrib.c | 16 
> >>>  1 file changed, 16 insertions(+)
> >>>
> >>> diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
> >>> index cbe93ab6fa..d9f165b428 100644
> >>> --- a/src/mesa/main/attrib.c
> >>> +++ b/src/mesa/main/attrib.c
> >>> @@ -73,6 +73,8 @@ struct gl_enable_attrib
> >>> GLboolean ColorMaterial;
> >>> GLboolean CullFace;
> >>> GLboolean DepthClamp;
> >>> +   GLboolean DepthClampNear;
> >>> +   GLboolean DepthClampFar;
> >>
> >> The first patch uses this. Also, DepthClamp can be removed, because
> >> DepthClampNear+Far replace it, right?
> >
> > Based on your comment on patch 4 and my comments on patch 0, maybe we
> > should:
> >
> > - Remove DepthClamp.  Add _DepthClamp, DepthClampNear, and DepthClampFar.
>
> I might be missing some pieces. But DepthClampNear + far can replaces
> DepthClamp. so why do we need _DepthClamp ? (Adding _DepthClamp means
> it will be derived from DepthClampNear+far, correct ? removing DepthClamp
> here means, need to completely get rid of every reference of
> DepthClamp in source code? )
>
> >
> > - If GL_DEPTH_CLAMP is set, set all three.  If GL_DEPTH_CLAMP is
> > cleared, clear all three.
> >
> > - If either of GL_DEPTH_CLAMP_FAR_AMD or GL_DEPTH_CLAMP_NEAR_AMD
> > changes, change _DepthClamp to DepthClampNear || DepthClampFar.
> >
>
> We only need to handle this case - "Querying DEPTH_CLAMP will return TRUE if 
> DEPTH_CLAMP_NEAR_AMD _or_
> DEPTH_CLAMP_FAR_AMD is enabled."
> I think we don't have to keep changing _DepthClamp, because if we do it
> then it will enable depth clamping for both the planes and will get different 
> behavior.
> Please correct me if I am wrong or missing anything.
>
> > - Drivers that enable AMD_depth_clamp_separate will only ever look at
> > DepthClampNear and DepthClampFar.
> >
> > I think that gets all the cases correct with the minimum fuss.  Marek,
> > what do you think?
> >
> >> Marek
> >> ___
> >> mesa-dev mailing list
> >> mesa-dev@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: drop wrong initialization of COMPUTE_RESOURCE_LIMITS

2018-08-20 Thread Marek Olšák
You are lucky that WAVES_PER_SH was not 3. Such a low limit could
decrease compute shader performance to ~2%.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104843] Mesa version not updated for rebuild

2018-08-20 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104843

Timothy Arceri  changed:

   What|Removed |Added

 Status|NEW |NEEDINFO

--- Comment #1 from Timothy Arceri  ---
Hi Mark/Dylan, any news on this? Is this bug report still valid?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/8] mesa: expose ARB_post_depth_coverage in the Compatibility profile

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

It only contains GLSL changes.

v2: allow the layout qualifier on GLSL <= 1.30
---
 src/compiler/glsl/glsl_lexer.ll  | 1 +
 src/mesa/main/extensions_table.h | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll
index 87b64e09c16..964e937a481 100644
--- a/src/compiler/glsl/glsl_lexer.ll
+++ b/src/compiler/glsl/glsl_lexer.ll
@@ -490,20 +490,21 @@ structreturn STRUCT;
 void   return VOID_TOK;
 
 layout {
  if ((yyextra->is_version(140, 300))
  || yyextra->ARB_bindless_texture_enable
  || yyextra->KHR_blend_equation_advanced_enable
  || yyextra->AMD_conservative_depth_enable
  || yyextra->ARB_conservative_depth_enable
  || yyextra->ARB_explicit_attrib_location_enable
  || yyextra->ARB_explicit_uniform_location_enable
+  || yyextra->ARB_post_depth_coverage
   || yyextra->has_separate_shader_objects()
  || yyextra->ARB_uniform_buffer_object_enable
  || yyextra->ARB_fragment_coord_conventions_enable
   || yyextra->ARB_shading_language_420pack_enable
   || yyextra->ARB_compute_shader_enable
   || yyextra->ARB_tessellation_shader_enable
   || 
yyextra->EXT_shader_framebuffer_fetch_non_coherent_enable) {
  return LAYOUT_TOK;
   } else {
  return classify_identifier(yyextra, yytext, yyleng, 
yylval);
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index af5ce118da4..3bd824b2dfe 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -92,21 +92,21 @@ EXT(ARB_multi_bind  , dummy_true
 EXT(ARB_multi_draw_indirect , ARB_draw_indirect
  , GLL, GLC,  x ,  x , 2012)
 EXT(ARB_multisample , dummy_true   
  , GLL,  x ,  x ,  x , 1994)
 EXT(ARB_multitexture, dummy_true   
  , GLL,  x ,  x ,  x , 1998)
 EXT(ARB_occlusion_query , ARB_occlusion_query  
  , GLL,  x ,  x ,  x , 2001)
 EXT(ARB_occlusion_query2, ARB_occlusion_query2 
  , GLL, GLC,  x ,  x , 2003)
 EXT(ARB_pipeline_statistics_query   , ARB_pipeline_statistics_query
  , GLL, GLC,  x ,  x , 2014)
 EXT(ARB_pixel_buffer_object , EXT_pixel_buffer_object  
  , GLL, GLC,  x ,  x , 2004)
 EXT(ARB_point_parameters, EXT_point_parameters 
  , GLL,  x ,  x ,  x , 1997)
 EXT(ARB_point_sprite, ARB_point_sprite 
  , GLL, GLC,  x ,  x , 2003)
 EXT(ARB_polygon_offset_clamp, ARB_polygon_offset_clamp 
  , GLL, GLC,  x ,  x , 2017)
-EXT(ARB_post_depth_coverage , ARB_post_depth_coverage  
  ,  x , GLC,  x ,  x,  2015)
+EXT(ARB_post_depth_coverage , ARB_post_depth_coverage  
  , GLL, GLC,  x ,  x,  2015)
 EXT(ARB_program_interface_query , dummy_true   
  , GLL, GLC,  x ,  x , 2012)
 EXT(ARB_provoking_vertex, EXT_provoking_vertex 
  , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_query_buffer_object , ARB_query_buffer_object  
  , GLL, GLC,  x ,  x , 2013)
 EXT(ARB_robust_buffer_access_behavior   , 
ARB_robust_buffer_access_behavior  , GLL, GLC,  x ,  x , 2012)
 EXT(ARB_robustness  , dummy_true   
  , GLL, GLC,  x ,  x , 2010)
 EXT(ARB_sample_locations, ARB_sample_locations 
  , GLL, GLC,  x ,  x , 2015)
 EXT(ARB_sample_shading  , ARB_sample_shading   
  , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_sampler_objects , dummy_true   
  , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_seamless_cube_map   , ARB_seamless_cube_map
  , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_seamless_cubemap_per_texture, AMD_seamless_cubemap_per_texture 
  , GLL, GLC,  x ,  x , 2013)
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/11] mesa: expose EXT_texture_buffer_object

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

This is needed for exposing the samplerBuffer functions under
EXT_gpu_shader4.

v2: - expose it in the compat profile only
- make it an alias of EXT_gpu_shader4

Reviewed-by: Timothy Arceri  (v1)
---
 docs/relnotes/18.3.0.html| 1 +
 src/mesa/main/extensions_table.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/docs/relnotes/18.3.0.html b/docs/relnotes/18.3.0.html
index ac2cc1e893b..3bb6e253252 100644
--- a/docs/relnotes/18.3.0.html
+++ b/docs/relnotes/18.3.0.html
@@ -45,20 +45,21 @@ TBD.
 
 
 New features
 
 
 Note: some of the new features are only available with certain drivers.
 
 
 
 GL_AMD_framebuffer_multisample_advanced on radeonsi.
+GL_EXT_texture_buffer_object on i965, nv50, nvc0, r600, radeonsi.
 GL_EXT_window_rectangles on radeonsi.
 
 
 Bug fixes
 
 
 TBD
 
 
 Changes
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index af5ce118da4..af5edb35051 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -264,20 +264,21 @@ EXT(EXT_shadow_funcs, ARB_shadow
 EXT(EXT_stencil_two_side, EXT_stencil_two_side 
  , GLL,  x ,  x ,  x , 2001)
 EXT(EXT_stencil_wrap, dummy_true   
  , GLL,  x ,  x ,  x , 2002)
 EXT(EXT_subtexture  , dummy_true   
  , GLL,  x ,  x ,  x , 1995)
 EXT(EXT_tessellation_point_size , ARB_tessellation_shader  
  ,  x ,  x ,  x ,  31, 2013)
 EXT(EXT_tessellation_shader , ARB_tessellation_shader  
  ,  x ,  x ,  x ,  31, 2013)
 EXT(EXT_texture , dummy_true   
  , GLL,  x ,  x ,  x , 1996)
 EXT(EXT_texture3D   , dummy_true   
  , GLL,  x ,  x ,  x , 1996)
 EXT(EXT_texture_array   , EXT_texture_array
  , GLL, GLC,  x ,  x , 2006)
 EXT(EXT_texture_border_clamp, ARB_texture_border_clamp 
  ,  x ,  x ,  x , ES2, 2014)
 EXT(EXT_texture_buffer  , OES_texture_buffer   
  ,  x ,  x ,  x ,  31, 2014)
+EXT(EXT_texture_buffer_object   , EXT_gpu_shader4  
  , GLL,  x ,  x ,  x , 2007)
 EXT(EXT_texture_compression_dxt1, ANGLE_texture_compression_dxt
  , GLL, GLC, ES1, ES2, 2004)
 EXT(EXT_texture_compression_latc, EXT_texture_compression_latc 
  , GLL,  x ,  x ,  x , 2006)
 EXT(EXT_texture_compression_rgtc, ARB_texture_compression_rgtc 
  , GLL, GLC,  x ,  x , 2004)
 EXT(EXT_texture_compression_s3tc, EXT_texture_compression_s3tc 
  , GLL, GLC,  x ,  x , 2000)
 EXT(EXT_texture_cube_map, ARB_texture_cube_map 
  , GLL,  x ,  x ,  x , 2001)
 EXT(EXT_texture_cube_map_array  , OES_texture_cube_map_array   
  ,  x ,  x ,  x ,  31, 2014)
 EXT(EXT_texture_edge_clamp  , dummy_true   
  , GLL,  x ,  x ,  x , 1997)
 EXT(EXT_texture_env_add , dummy_true   
  , GLL,  x ,  x ,  x , 1999)
 EXT(EXT_texture_env_combine , dummy_true   
  , GLL,  x ,  x ,  x , 2000)
 EXT(EXT_texture_env_dot3, EXT_texture_env_dot3 
  , GLL,  x ,  x ,  x , 2000)
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: only allow EXT_gpu_shader4 in the compatibility profile

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/mesa/main/extensions_table.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index af5edb35051..9f9038f97c6 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -224,21 +224,21 @@ EXT(EXT_draw_range_elements , dummy_true
 EXT(EXT_fog_coord   , dummy_true   
  , GLL,  x ,  x ,  x , 1999)
 EXT(EXT_frag_depth  , dummy_true   
  ,  x ,  x ,  x , ES2, 2010)
 EXT(EXT_framebuffer_blit, dummy_true   
  , GLL, GLC,  x ,  x , 2005)
 EXT(EXT_framebuffer_multisample , EXT_framebuffer_multisample  
  , GLL, GLC,  x ,  x , 2005)
 EXT(EXT_framebuffer_multisample_blit_scaled , 
EXT_framebuffer_multisample_blit_scaled, GLL, GLC,  x ,  x , 2011)
 EXT(EXT_framebuffer_object  , dummy_true   
  , GLL,  x ,  x ,  x , 2000)
 EXT(EXT_framebuffer_sRGB, EXT_framebuffer_sRGB 
  , GLL, GLC,  x ,  x , 1998)
 EXT(EXT_geometry_point_size , OES_geometry_shader  
  ,  x ,  x ,  x ,  31, 2015)
 EXT(EXT_geometry_shader , OES_geometry_shader  
  ,  x ,  x ,  x ,  31, 2015)
 EXT(EXT_gpu_program_parameters  , EXT_gpu_program_parameters   
  , GLL,  x ,  x ,  x , 2006)
-EXT(EXT_gpu_shader4 , EXT_gpu_shader4  
  , GLL, GLC,  x ,  x , 2006)
+EXT(EXT_gpu_shader4 , EXT_gpu_shader4  
  , GLL,  x ,  x ,  x , 2006)
 EXT(EXT_gpu_shader5 , ARB_gpu_shader5  
  ,  x ,  x ,  x ,  31, 2014)
 EXT(EXT_map_buffer_range, ARB_map_buffer_range 
  ,  x ,  x , ES1, ES2, 2012)
 EXT(EXT_memory_object   , EXT_memory_object
  , GLL, GLC,  x , ES2, 2017)
 EXT(EXT_memory_object_fd, EXT_memory_object_fd 
  , GLL, GLC,  x , ES2, 2017)
 EXT(EXT_multi_draw_arrays   , dummy_true   
  , GLL,  x , ES1, ES2, 1999)
 EXT(EXT_occlusion_query_boolean , ARB_occlusion_query  
  ,  x ,  x ,  x , ES2, 2001)
 EXT(EXT_packed_depth_stencil, dummy_true   
  , GLL, GLC,  x ,  x , 2005)
 EXT(EXT_packed_float, EXT_packed_float 
  , GLL, GLC,  x ,  x , 2004)
 EXT(EXT_packed_pixels   , dummy_true   
  , GLL,  x ,  x ,  x , 1997)
 EXT(EXT_pixel_buffer_object , EXT_pixel_buffer_object  
  , GLL, GLC,  x ,  x , 2004)
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107224] Incorrect Rendering in Deus Ex: Mankind Divided in-game menu

2018-08-20 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107224

--- Comment #11 from Timothy Arceri  ---
(In reply to Alex Smith from comment #10)
> We've just released a game data update that should fix this issue for both
> AMD and Intel, as well as a shader compilation failure on 18.2. It should be
> applied next time you launch the game (it's handled outside of Steam, will
> be automatically downloaded at startup).

Thanks! I can confirm all issues are gone on both AMD and Intel.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 103385] Cross compilation fails with undefined reference

2018-08-20 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=103385

Timothy Arceri  changed:

   What|Removed |Added

 Resolution|--- |WONTFIX
 Status|NEW |RESOLVED

--- Comment #2 from Timothy Arceri  ---
(In reply to bkarthikk93 from comment #0)
> I have been trying to cross compile Mesa 13.0.6 and 12.0.6 for an arm
> platform. Both compilation is failing with the following error 
> 
>  Making all in gbm
> make[3]: Entering directory
> '/opt/qt/qt5.9.1arm_source/dependencies/mesa/mesa-13.0.6/src/gbm'
>   CCLD libgbm.la
> ../../src/loader/.libs/libloader.a(libxmlconfig_la-xmlconfig.o): In function
> `__getProgramName':
> xmlconfig.c:(.text+0x78): undefined reference to `__progname'
> collect2: error: ld returned 1 exit status
> Makefile:847: recipe for target 'libgbm.la' failed
> make[3]: *** [libgbm.la] Error 1

You need to make sure correct support is offered in xmlconfig.c for your
platform/compiler. Patches welcome and can be sent to the mesa-dev mailing
list. 

For now I'm going to close this bug as wont fix.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/8] mesa: expose AMD_gpu_shader_int64

2018-08-20 Thread Marek Olšák
On Wed, Aug 15, 2018 at 2:25 PM Ian Romanick  wrote:
>
> On 08/08/2018 07:12 PM, Marek Olšák wrote:
> > From: Marek Olšák 
> >
> > because the closed driver exposes it.
> >
> > It's equivalent to ARB_gpu_shader_int64.
> > In this patch, I did everything the same as we do for ARB_gpu_shader_int64.
>
> The closed driver only exposes this on core profile?  That's unexpected.

The closed driver exposes it in both profiles, but Mesa needs display
support before it can expose it in the compatibility profile.

Marek

>
> Either way, this patch is
>
> Reviewed-by: Ian Romanick 
>
> > ---
> >  docs/relnotes/18.3.0.html   |   1 +
> >  src/compiler/glsl/builtin_functions.cpp |   3 +-
> >  src/compiler/glsl/builtin_types.cpp |   3 +-
> >  src/compiler/glsl/glsl_lexer.ll |  18 +-
> >  src/compiler/glsl/glsl_parser_extras.cpp|   1 +
> >  src/compiler/glsl/glsl_parser_extras.h  |   5 +-
> >  src/mapi/glapi/gen/AMD_gpu_shader_int64.xml | 239 
> >  src/mapi/glapi/gen/gl_API.xml   |   2 +
> >  src/mesa/main/extensions_table.h|   1 +
> >  9 files changed, 261 insertions(+), 12 deletions(-)
> >  create mode 100644 src/mapi/glapi/gen/AMD_gpu_shader_int64.xml
> >
> > diff --git a/docs/relnotes/18.3.0.html b/docs/relnotes/18.3.0.html
> > index ac2cc1e893b..840a336d41a 100644
> > --- a/docs/relnotes/18.3.0.html
> > +++ b/docs/relnotes/18.3.0.html
> > @@ -45,20 +45,21 @@ TBD.
> >
> >
> >  New features
> >
> >  
> >  Note: some of the new features are only available with certain drivers.
> >  
> >
> >  
> >  GL_AMD_framebuffer_multisample_advanced on radeonsi.
> > +GL_AMD_gpu_shader_int64 on i965, nvc0, radeonsi.
> >  GL_EXT_window_rectangles on radeonsi.
> >  
> >
> >  Bug fixes
> >
> >  
> >  TBD
> >  
> >
> >  Changes
> > diff --git a/src/compiler/glsl/builtin_functions.cpp 
> > b/src/compiler/glsl/builtin_functions.cpp
> > index 7119903795f..e37d96c4636 100644
> > --- a/src/compiler/glsl/builtin_functions.cpp
> > +++ b/src/compiler/glsl/builtin_functions.cpp
> > @@ -522,21 +522,22 @@ supports_arb_fragment_shader_interlock(const 
> > _mesa_glsl_parse_state *state)
> >  static bool
> >  shader_clock(const _mesa_glsl_parse_state *state)
> >  {
> > return state->ARB_shader_clock_enable;
> >  }
> >
> >  static bool
> >  shader_clock_int64(const _mesa_glsl_parse_state *state)
> >  {
> > return state->ARB_shader_clock_enable &&
> > -  state->ARB_gpu_shader_int64_enable;
> > +  (state->ARB_gpu_shader_int64_enable ||
> > +   state->AMD_gpu_shader_int64_enable);
> >  }
> >
> >  static bool
> >  shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
> >  {
> > return state->has_shader_storage_buffer_objects();
> >  }
> >
> >  static bool
> >  shader_trinary_minmax(const _mesa_glsl_parse_state *state)
> > diff --git a/src/compiler/glsl/builtin_types.cpp 
> > b/src/compiler/glsl/builtin_types.cpp
> > index 7a01cb48bc8..d2bcdd68138 100644
> > --- a/src/compiler/glsl/builtin_types.cpp
> > +++ b/src/compiler/glsl/builtin_types.cpp
> > @@ -404,21 +404,22 @@ _mesa_glsl_initialize_types(struct 
> > _mesa_glsl_parse_state *state)
> >add_type(symbols, glsl_type::dmat3_type);
> >add_type(symbols, glsl_type::dmat4_type);
> >add_type(symbols, glsl_type::dmat2x3_type);
> >add_type(symbols, glsl_type::dmat2x4_type);
> >add_type(symbols, glsl_type::dmat3x2_type);
> >add_type(symbols, glsl_type::dmat3x4_type);
> >add_type(symbols, glsl_type::dmat4x2_type);
> >add_type(symbols, glsl_type::dmat4x3_type);
> > }
> >
> > -   if (state->ARB_gpu_shader_int64_enable) {
> > +   if (state->ARB_gpu_shader_int64_enable ||
> > +   state->AMD_gpu_shader_int64_enable) {
> >add_type(symbols, glsl_type::int64_t_type);
> >add_type(symbols, glsl_type::i64vec2_type);
> >add_type(symbols, glsl_type::i64vec3_type);
> >add_type(symbols, glsl_type::i64vec4_type);
> >
> >add_type(symbols, glsl_type::uint64_t_type);
> >add_type(symbols, glsl_type::u64vec2_type);
> >add_type(symbols, glsl_type::u64vec3_type);
> >add_type(symbols, glsl_type::u64vec4_type);
> > }
> > diff --git a/src/compiler/glsl/glsl_lexer.ll 
> > b/src/compiler/glsl/glsl_lexer.ll
> > index 87b64e09c16..74d4acca61f 100644
> > --- a/src/compiler/glsl/glsl_lexer.ll
> > +++ b/src/compiler/glsl/glsl_lexer.ll
> > @@ -656,29 +656,29 @@ isampler2DRect  TYPE(140, 300, 140, 0, 
> > glsl_type::isampler2DRect_type);
> >  usampler2DRect   TYPE(140, 300, 140, 0, 
> > glsl_type::usampler2DRect_type);
> >  isamplerBuffer   TYPE_WITH_ALT(140, 300, 140, 320, 
> > yyextra->EXT_texture_buffer_enable || yyextra->OES_texture_buffer_enable, 
> > glsl_type::isamplerBuffer_type);
> >  usamplerBuffer   TYPE_WITH_ALT(140, 300, 140, 320, 
> > yyextra->EXT_texture_buffer_enable || yyextra->OES_texture_buffer_enable, 
> > glsl_type::usamplerBuffer_type);
> 

[Mesa-dev] [PATCH 5/9] ac: add ac_build_s_barrier

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_llvm_build.c   | 6 ++
 src/amd/common/ac_llvm_build.h   | 1 +
 src/amd/common/ac_nir_to_llvm.c  | 3 +--
 src/gallium/drivers/radeonsi/si_shader.c | 4 +---
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 87e36df6431..c89bdf49faf 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -341,20 +341,26 @@ void ac_build_type_name_for_intr(LLVMTypeRef type, char 
*buf, unsigned bufsize)
 LLVMValueRef
 ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
 unsigned count_incoming, LLVMValueRef *values,
 LLVMBasicBlockRef *blocks)
 {
LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
LLVMAddIncoming(phi, values, blocks, count_incoming);
return phi;
 }
 
+void ac_build_s_barrier(struct ac_llvm_context *ctx)
+{
+   ac_build_intrinsic(ctx, "llvm.amdgcn.s.barrier", ctx->voidt, NULL,
+  0, AC_FUNC_ATTR_CONVERGENT);
+}
+
 /* Prevent optimizations (at least of memory accesses) across the current
  * point in the program by emitting empty inline assembly that is marked as
  * having side effects.
  *
  * Optionally, a value can be passed through the inline assembly to prevent
  * LLVM from hoisting calls to ReadNone functions.
  */
 void
 ac_build_optimization_barrier(struct ac_llvm_context *ctx,
  LLVMValueRef *pvgpr)
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index c5753037e7b..84212f0d459 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -126,20 +126,21 @@ ac_build_intrinsic(struct ac_llvm_context *ctx, const 
char *name,
   LLVMTypeRef return_type, LLVMValueRef *params,
   unsigned param_count, unsigned attrib_mask);
 
 void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned 
bufsize);
 
 LLVMValueRef
 ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
 unsigned count_incoming, LLVMValueRef *values,
 LLVMBasicBlockRef *blocks);
 
+void ac_build_s_barrier(struct ac_llvm_context *ctx);
 void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
   LLVMValueRef *pvgpr);
 
 LLVMValueRef ac_build_shader_clock(struct ac_llvm_context *ctx);
 
 LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value);
 
 LLVMValueRef ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef 
value);
 
 LLVMValueRef ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef 
value);
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index cffc980e51f..1584fef7ab7 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2575,22 +2575,21 @@ static void emit_membar(struct ac_llvm_context *ac,
 void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
 {
/* SI only (thanks to a hw bug workaround):
 * The real barrier instruction isn’t needed, because an entire patch
 * always fits into a single wave.
 */
if (ac->chip_class == SI && stage == MESA_SHADER_TESS_CTRL) {
ac_build_waitcnt(ac, LGKM_CNT & VM_CNT);
return;
}
-   ac_build_intrinsic(ac, "llvm.amdgcn.s.barrier",
-  ac->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
+   ac_build_s_barrier(ac);
 }
 
 static void emit_discard(struct ac_nir_context *ctx,
 const nir_intrinsic_instr *instr)
 {
LLVMValueRef cond;
 
if (instr->intrinsic == nir_intrinsic_discard_if) {
cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
 get_src(ctx, instr->src[0]),
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 81c825db1e4..29523474735 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4397,23 +4397,21 @@ static void si_llvm_emit_barrier(const struct 
lp_build_tgsi_action *action,
/* SI only (thanks to a hw bug workaround):
 * The real barrier instruction isn’t needed, because an entire patch
 * always fits into a single wave.
 */
if (ctx->screen->info.chip_class == SI &&
ctx->type == PIPE_SHADER_TESS_CTRL) {
ac_build_waitcnt(>ac, LGKM_CNT & VM_CNT);
return;
}
 
-   ac_build_intrinsic(>ac,
-  "llvm.amdgcn.s.barrier",
-  ctx->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
+   ac_build_s_barrier(>ac);
 }
 
 static void si_create_function(struct si_shader_context *ctx,
   const char *name,
   LLVMTypeRef *returns, unsigned num_returns,
   

[Mesa-dev] [PATCH 6/9] ac: add imad & fmad helpers

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_llvm_build.c | 14 ++
 src/amd/common/ac_llvm_build.h |  4 
 2 files changed, 18 insertions(+)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index c89bdf49faf..6d5bfb1a1be 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1962,20 +1962,34 @@ LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, 
LLVMValueRef input,
width,
};
 
return ac_build_intrinsic(ctx,
  is_signed ? "llvm.amdgcn.sbfe.i32" :
  "llvm.amdgcn.ubfe.i32",
  ctx->i32, args, 3,
  AC_FUNC_ATTR_READNONE);
 }
 
+LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+  LLVMValueRef s1, LLVMValueRef s2)
+{
+   return LLVMBuildAdd(ctx->builder,
+   LLVMBuildMul(ctx->builder, s0, s1, ""), s2, "");
+}
+
+LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+  LLVMValueRef s1, LLVMValueRef s2)
+{
+   return LLVMBuildFAdd(ctx->builder,
+LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, "");
+}
+
 void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16)
 {
LLVMValueRef args[1] = {
LLVMConstInt(ctx->i32, simm16, false),
};
ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt",
   ctx->voidt, args, 1, 0);
 }
 
 LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 84212f0d459..d11a6ebb1be 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -391,20 +391,24 @@ LLVMValueRef ac_build_cvt_pknorm_u16(struct 
ac_llvm_context *ctx,
 LLVMValueRef args[2]);
 LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
 LLVMValueRef args[2], unsigned bits, bool hi);
 LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
 LLVMValueRef args[2], unsigned bits, bool hi);
 LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1);
 void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1);
 LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
  LLVMValueRef offset, LLVMValueRef width,
  bool is_signed);
+LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+  LLVMValueRef s1, LLVMValueRef s2);
+LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+  LLVMValueRef s1, LLVMValueRef s2);
 
 void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
 
 LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
   unsigned bitsize);
 
 LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize);
 
 LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/11] glsl: enable types for EXT_gpu_shader4

2018-08-20 Thread Marek Olšák
On Mon, Aug 13, 2018 at 1:42 PM Ian Romanick  wrote:
>
> On 08/07/2018 10:42 PM, Marek Olšák wrote:
> > From: Chris Forbes 
> >
> > ---
> >  src/compiler/glsl/builtin_types.cpp | 32 ++
> >  src/compiler/glsl/glsl_lexer.ll | 50 ++---
> >  2 files changed, 57 insertions(+), 25 deletions(-)
> >
> > diff --git a/src/compiler/glsl/builtin_types.cpp 
> > b/src/compiler/glsl/builtin_types.cpp
> > index 7a01cb48bc8..2e141447252 100644
> > --- a/src/compiler/glsl/builtin_types.cpp
> > +++ b/src/compiler/glsl/builtin_types.cpp
> > @@ -317,20 +317,52 @@ _mesa_glsl_initialize_types(struct 
> > _mesa_glsl_parse_state *state)
> >add_type(symbols, glsl_type::sampler2DMSArray_type);
> >add_type(symbols, glsl_type::isampler2DMSArray_type);
> >add_type(symbols, glsl_type::usampler2DMSArray_type);
> > }
> >
> > if (state->ARB_texture_rectangle_enable) {
> >add_type(symbols, glsl_type::sampler2DRect_type);
> >add_type(symbols, glsl_type::sampler2DRectShadow_type);
> > }
> >
> > +   if (state->EXT_gpu_shader4_enable) {
> > +  add_type(symbols, glsl_type::uint_type);
> > +  add_type(symbols, glsl_type::uvec2_type);
> > +  add_type(symbols, glsl_type::uvec3_type);
> > +  add_type(symbols, glsl_type::uvec4_type);
> > +
> > +  add_type(symbols, glsl_type::sampler1DArray_type);
> > +  add_type(symbols, glsl_type::sampler2DArray_type);
> > +  add_type(symbols, glsl_type::sampler1DArrayShadow_type);
> > +  add_type(symbols, glsl_type::sampler2DArrayShadow_type);
> > +  add_type(symbols, glsl_type::samplerCubeShadow_type);
> > +  add_type(symbols, glsl_type::samplerBuffer_type);
> > +
> > +  add_type(symbols, glsl_type::isampler1D_type);
> > +  add_type(symbols, glsl_type::isampler2D_type);
> > +  add_type(symbols, glsl_type::isampler3D_type);
> > +  add_type(symbols, glsl_type::isamplerCube_type);
> > +  add_type(symbols, glsl_type::isampler2DRect_type);
> > +  add_type(symbols, glsl_type::isampler1DArray_type);
> > +  add_type(symbols, glsl_type::isampler2DArray_type);
> > +  add_type(symbols, glsl_type::isamplerBuffer_type);
> > +
> > +  add_type(symbols, glsl_type::usampler1D_type);
> > +  add_type(symbols, glsl_type::usampler2D_type);
> > +  add_type(symbols, glsl_type::usampler3D_type);
> > +  add_type(symbols, glsl_type::usamplerCube_type);
> > +  add_type(symbols, glsl_type::usampler2DRect_type);
> > +  add_type(symbols, glsl_type::usampler1DArray_type);
> > +  add_type(symbols, glsl_type::usampler2DArray_type);
> > +  add_type(symbols, glsl_type::usamplerBuffer_type);
> > +   }
> > +
> > if (state->EXT_texture_array_enable) {
> >add_type(symbols, glsl_type::sampler1DArray_type);
> >add_type(symbols, glsl_type::sampler2DArray_type);
> >add_type(symbols, glsl_type::sampler1DArrayShadow_type);
> >add_type(symbols, glsl_type::sampler2DArrayShadow_type);
> > }
> >
> > if (state->OES_EGL_image_external_enable ||
> > state->OES_EGL_image_external_essl3_enable) {
> >add_type(symbols, glsl_type::samplerExternalOES_type);
> > diff --git a/src/compiler/glsl/glsl_lexer.ll 
> > b/src/compiler/glsl/glsl_lexer.ll
> > index 65c5b414a2d..67c33d6f1c9 100644
> > --- a/src/compiler/glsl/glsl_lexer.ll
> > +++ b/src/compiler/glsl/glsl_lexer.ll
> > @@ -347,23 +347,23 @@ for return FOR;
> >  if   return IF;
> >  discard  return DISCARD;
> >  return   return RETURN;
> >
> >  bvec2{ yylval->type = glsl_type::bvec2_type; return 
> > BASIC_TYPE_TOK; }
> >  bvec3{ yylval->type = glsl_type::bvec3_type; return 
> > BASIC_TYPE_TOK; }
> >  bvec4{ yylval->type = glsl_type::bvec4_type; return 
> > BASIC_TYPE_TOK; }
> >  ivec2{ yylval->type = glsl_type::ivec2_type; return 
> > BASIC_TYPE_TOK; }
> >  ivec3{ yylval->type = glsl_type::ivec3_type; return 
> > BASIC_TYPE_TOK; }
> >  ivec4{ yylval->type = glsl_type::ivec4_type; return 
> > BASIC_TYPE_TOK; }
> > -uvec2TYPE(130, 300, 130, 300, glsl_type::uvec2_type);
> > -uvec3TYPE(130, 300, 130, 300, glsl_type::uvec3_type);
> > -uvec4TYPE(130, 300, 130, 300, glsl_type::uvec4_type);
> > +uvec2TYPE_WITH_ALT(130, 300, 130, 300, 
> > yyextra->EXT_gpu_shader4_enable, glsl_type::uvec2_type);
> > +uvec3TYPE_WITH_ALT(130, 300, 130, 300, 
> > yyextra->EXT_gpu_shader4_enable, glsl_type::uvec3_type);
> > +uvec4TYPE_WITH_ALT(130, 300, 130, 300, 
> > yyextra->EXT_gpu_shader4_enable, glsl_type::uvec4_type);
> >  vec2 { yylval->type = glsl_type::vec2_type; return BASIC_TYPE_TOK; 
> > }
> >  vec3 { yylval->type = glsl_type::vec3_type; return BASIC_TYPE_TOK; 
> > }
> >  vec4 { yylval->type = glsl_type::vec4_type; return 

Re: [Mesa-dev] [RFC][PATCH 3/5] mesa: Add support for AMD_depth_clamp_separate

2018-08-20 Thread Ian Romanick
On 08/20/2018 04:02 PM, Marek Olšák wrote:
> I wouldn't add _DepthClamp. Having just DepthClampNear and
> DepthClampFar should be enough. Drivers not supporting the extension
> can use either variable, because they will be equal.
> 
> The glGet query can be handled as LOC_CUSTOM.

Yeah, that works for me.

> Marek
> 
> On Sun, Aug 19, 2018 at 6:43 PM Sagar Ghuge  wrote:
>>
>>
>>
>> On 08/13/2018 03:52 PM, Ian Romanick wrote:
>>> On 08/09/2018 01:09 PM, Marek Olšák wrote:
 On Wed, Aug 1, 2018 at 11:31 PM, Sagar Ghuge  wrote:
> enable _mesa_PushAttrib() and _mesa_PopAttrib()
> to handle GL_DEPTH_CLAMP_NEAR_AMD and
> GL_DEPTH_CLAMP_FAR_AMD tokens.
>
> Signed-off-by: Sagar Ghuge 
> ---
>  src/mesa/main/attrib.c | 16 
>  1 file changed, 16 insertions(+)
>
> diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
> index cbe93ab6fa..d9f165b428 100644
> --- a/src/mesa/main/attrib.c
> +++ b/src/mesa/main/attrib.c
> @@ -73,6 +73,8 @@ struct gl_enable_attrib
> GLboolean ColorMaterial;
> GLboolean CullFace;
> GLboolean DepthClamp;
> +   GLboolean DepthClampNear;
> +   GLboolean DepthClampFar;

 The first patch uses this. Also, DepthClamp can be removed, because
 DepthClampNear+Far replace it, right?
>>>
>>> Based on your comment on patch 4 and my comments on patch 0, maybe we
>>> should:
>>>
>>> - Remove DepthClamp.  Add _DepthClamp, DepthClampNear, and DepthClampFar.
>>
>> I might be missing some pieces. But DepthClampNear + far can replaces
>> DepthClamp. so why do we need _DepthClamp ? (Adding _DepthClamp means
>> it will be derived from DepthClampNear+far, correct ? removing DepthClamp
>> here means, need to completely get rid of every reference of
>> DepthClamp in source code? )
>>
>>>
>>> - If GL_DEPTH_CLAMP is set, set all three.  If GL_DEPTH_CLAMP is
>>> cleared, clear all three.
>>>
>>> - If either of GL_DEPTH_CLAMP_FAR_AMD or GL_DEPTH_CLAMP_NEAR_AMD
>>> changes, change _DepthClamp to DepthClampNear || DepthClampFar.
>>>
>>
>> We only need to handle this case - "Querying DEPTH_CLAMP will return TRUE if 
>> DEPTH_CLAMP_NEAR_AMD _or_
>> DEPTH_CLAMP_FAR_AMD is enabled."
>> I think we don't have to keep changing _DepthClamp, because if we do it
>> then it will enable depth clamping for both the planes and will get 
>> different behavior.
>> Please correct me if I am wrong or missing anything.
>>
>>> - Drivers that enable AMD_depth_clamp_separate will only ever look at
>>> DepthClampNear and DepthClampFar.
>>>
>>> I think that gets all the cases correct with the minimum fuss.  Marek,
>>> what do you think?
>>>
 Marek
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] st/mesa: help fix stencil border color for GL_DEPTH_STENCIL textures

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

GL_STENCIL_INDEX uses GL_INTENSITY for the border color, which is nicer
to hardware that doesn't read the stencil border value from the X channel.

This fixes a bunch of dEQP tests on Vega & Raven.

Cc: 18.1 18.2 
---
 src/mesa/state_tracker/st_atom_sampler.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/state_tracker/st_atom_sampler.c 
b/src/mesa/state_tracker/st_atom_sampler.c
index 289856cd72d..27e4da31581 100644
--- a/src/mesa/state_tracker/st_atom_sampler.c
+++ b/src/mesa/state_tracker/st_atom_sampler.c
@@ -156,20 +156,23 @@ st_convert_sampler(const struct st_context *st,
/* For non-black borders... */
if (/* This is true if wrap modes are using the border color: */
(sampler->wrap_s | sampler->wrap_t | sampler->wrap_r) & 0x1 &&
(msamp->BorderColor.ui[0] ||
 msamp->BorderColor.ui[1] ||
 msamp->BorderColor.ui[2] ||
 msamp->BorderColor.ui[3])) {
   const GLboolean is_integer = texobj->_IsIntegerFormat;
   GLenum texBaseFormat = _mesa_base_tex_image(texobj)->_BaseFormat;
 
+  if (texobj->StencilSampling)
+ texBaseFormat = GL_STENCIL_INDEX;
+
   if (st->apply_texture_swizzle_to_border_color) {
  const struct st_texture_object *stobj = 
st_texture_object_const(texobj);
  /* XXX: clean that up to not use the sampler view at all */
  const struct st_sampler_view *sv = 
st_texture_get_current_sampler_view(st, stobj);
 
  if (sv) {
 struct pipe_sampler_view *view = sv->view;
 union pipe_color_union tmp;
 const unsigned char swz[4] =
 {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] st/vdpau: silence an unitialized-variable warning

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/state_trackers/vdpau/decode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/vdpau/decode.c 
b/src/gallium/state_trackers/vdpau/decode.c
index 66d52257717..48dfb0e0003 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -520,21 +520,21 @@ vlVdpDecoderRenderH265(struct pipe_h265_picture_desc 
*picture,
memcpy(picture->RefPicSetLtCurr, picture_info->RefPicSetLtCurr, 8);
picture->UseRefPicList = false;
 
return VDP_STATUS_OK;
 }
 
 static void
 vlVdpDecoderFixVC1Startcode(uint32_t *num_buffers, const void *buffers[], 
unsigned sizes[])
 {
static const uint8_t vc1_startcode[] = { 0x00, 0x00, 0x01, 0x0D };
-   struct vl_vlc vlc;
+   struct vl_vlc vlc = {};
unsigned i;
 
/* search the first 64 bytes for a startcode */
vl_vlc_init(, *num_buffers, buffers, sizes);
while (vl_vlc_search_byte(, 64*8, 0x00) && vl_vlc_bits_left() >= 
32) {
   uint32_t value = vl_vlc_peekbits(, 32);
   if (value == 0x010D ||
   value == 0x010C ||
   value == 0x010B)
  return;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/7] radeonsi: add TGSI_SEMANTIC_CS_USER_DATA for reading up to 4 SGPRs with TGSI

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_compute.c| 16 +---
 src/gallium/drivers/radeonsi/si_compute.h|  1 +
 src/gallium/drivers/radeonsi/si_pipe.h   |  1 +
 src/gallium/drivers/radeonsi/si_shader.c | 11 +++
 src/gallium/drivers/radeonsi/si_shader.h |  7 +++
 .../drivers/radeonsi/si_shader_internal.h|  1 +
 6 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index ea6fa3e999d..c5d3d5fcf02 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -121,20 +121,22 @@ static void si_create_compute_state_async(void *job, int 
thread_index)
 >active_samplers_and_images);
 
program->shader.selector = 
program->shader.is_monolithic = true;
program->uses_grid_size = sel.info.uses_grid_size;
program->uses_bindless_samplers = sel.info.uses_bindless_samplers;
program->uses_bindless_images = sel.info.uses_bindless_images;
program->reads_variable_block_size =
sel.info.uses_block_size &&
sel.info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0;
+   program->num_cs_user_data_dwords =
+   sel.info.properties[TGSI_PROPERTY_CS_USER_DATA_DWORDS];
 
void *ir_binary = si_get_ir_binary();
 
/* Try to load the shader from the shader cache. */
mtx_lock(>shader_cache_mutex);
 
if (ir_binary &&
si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
mtx_unlock(>shader_cache_mutex);
 
@@ -152,21 +154,22 @@ static void si_create_compute_state_async(void *job, int 
thread_index)
 
if (program->ir_type == PIPE_SHADER_IR_TGSI)
FREE(program->ir.tgsi);
program->shader.selector = NULL;
return;
}
 
bool scratch_enabled = shader->config.scratch_bytes_per_wave > 
0;
unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS +
  (sel.info.uses_grid_size ? 3 : 0) +
- (program->reads_variable_block_size ? 3 : 
0);
+ (program->reads_variable_block_size ? 3 : 
0) +
+ program->num_cs_user_data_dwords;
 
shader->config.rsrc1 =
S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B848_DX10_CLAMP(1) |
S_00B848_FLOAT_MODE(shader->config.float_mode);
 
shader->config.rsrc2 =
S_00B84C_USER_SGPR(user_sgprs) |
S_00B84C_SCRATCH_EN(scratch_enabled) |
@@ -699,30 +702,32 @@ static bool si_upload_compute_input(struct si_context 
*sctx,
radeon_emit(cs, kernel_args_va);
radeon_emit(cs, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) 
|
S_008F04_STRIDE(0));
}
 
r600_resource_reference(_buffer, NULL);
 
return true;
 }
 
-static void si_setup_tgsi_grid(struct si_context *sctx,
+static void si_setup_tgsi_user_data(struct si_context *sctx,
 const struct pipe_grid_info *info)
 {
struct si_compute *program = sctx->cs_shader_state.program;
struct radeon_cmdbuf *cs = sctx->gfx_cs;
unsigned grid_size_reg = R_00B900_COMPUTE_USER_DATA_0 +
 4 * SI_NUM_RESOURCE_SGPRS;
unsigned block_size_reg = grid_size_reg +
  /* 12 bytes = 3 dwords. */
  12 * program->uses_grid_size;
+   unsigned cs_user_data_reg = block_size_reg +
+   12 * program->reads_variable_block_size;
 
if (info->indirect) {
if (program->uses_grid_size) {
uint64_t base_va = 
r600_resource(info->indirect)->gpu_address;
uint64_t va = base_va + info->indirect_offset;
int i;
 
radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 r600_resource(info->indirect),
 RADEON_USAGE_READ, 
RADEON_PRIO_DRAW_INDIRECT);
@@ -744,20 +749,25 @@ static void si_setup_tgsi_grid(struct si_context *sctx,
radeon_emit(cs, info->grid[1]);
radeon_emit(cs, info->grid[2]);
}
if (program->reads_variable_block_size) {
radeon_set_sh_reg_seq(cs, block_size_reg, 3);
radeon_emit(cs, info->block[0]);

[Mesa-dev] [PATCH 7/7] radeonsi: add a thorough clear/copy_buffer benchmark

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/Makefile.sources |   2 +-
 src/gallium/drivers/radeonsi/meson.build  |   2 +-
 src/gallium/drivers/radeonsi/si_blit.c|   2 +-
 src/gallium/drivers/radeonsi/si_cp_dma.c  |   8 +-
 src/gallium/drivers/radeonsi/si_pipe.c|   8 +-
 src/gallium/drivers/radeonsi/si_pipe.h|   9 +-
 .../drivers/radeonsi/si_shaderlib_tgsi.c  | 102 
 .../drivers/radeonsi/si_test_clearbuffer.c| 139 --
 .../drivers/radeonsi/si_test_dma_perf.c   | 470 ++
 9 files changed, 590 insertions(+), 152 deletions(-)
 delete mode 100644 src/gallium/drivers/radeonsi/si_test_clearbuffer.c
 create mode 100644 src/gallium/drivers/radeonsi/si_test_dma_perf.c

diff --git a/src/gallium/drivers/radeonsi/Makefile.sources 
b/src/gallium/drivers/radeonsi/Makefile.sources
index b52db3a0598..abdc4e07f1e 100644
--- a/src/gallium/drivers/radeonsi/Makefile.sources
+++ b/src/gallium/drivers/radeonsi/Makefile.sources
@@ -37,22 +37,22 @@ C_SOURCES := \
si_shader_tgsi_setup.c \
si_shaderlib_tgsi.c \
si_state.c \
si_state_binning.c \
si_state_draw.c \
si_state_msaa.c \
si_state_shaders.c \
si_state_streamout.c \
si_state_viewport.c \
si_state.h \
-   si_test_clearbuffer.c \
si_test_dma.c \
+   si_test_dma_perf.c \
si_texture.c \
si_uvd.c \
../radeon/r600_perfcounter.c \
../radeon/radeon_uvd.c \
../radeon/radeon_uvd.h \
../radeon/radeon_vcn_dec.c \
../radeon/radeon_vcn_dec.h \
../radeon/radeon_vcn_enc_1_2.c \
../radeon/radeon_vcn_enc.c \
../radeon/radeon_vcn_enc.h \
diff --git a/src/gallium/drivers/radeonsi/meson.build 
b/src/gallium/drivers/radeonsi/meson.build
index 57229046de1..4d6044f724b 100644
--- a/src/gallium/drivers/radeonsi/meson.build
+++ b/src/gallium/drivers/radeonsi/meson.build
@@ -53,22 +53,22 @@ files_libradeonsi = files(
   'si_shader_tgsi_setup.c',
   'si_shaderlib_tgsi.c',
   'si_state.c',
   'si_state.h',
   'si_state_binning.c',
   'si_state_draw.c',
   'si_state_msaa.c',
   'si_state_shaders.c',
   'si_state_streamout.c',
   'si_state_viewport.c',
-  'si_test_clearbuffer.c',
   'si_test_dma.c',
+  'si_test_dma_perf.c',
   'si_texture.c',
   'si_uvd.c',
   '../radeon/r600_perfcounter.c',
   '../radeon/radeon_uvd.c',
   '../radeon/radeon_uvd.h',
   '../radeon/radeon_vcn_enc_1_2.c',
   '../radeon/radeon_vcn_enc.c',
   '../radeon/radeon_vcn_enc.h',
   '../radeon/radeon_vcn_dec.c',
   '../radeon/radeon_vcn_dec.h',
diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index cf6495291bd..fcaff80125c 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -903,21 +903,21 @@ void si_resource_copy_region(struct pipe_context *ctx,
struct si_context *sctx = (struct si_context *)ctx;
struct si_texture *ssrc = (struct si_texture*)src;
struct pipe_surface *dst_view, dst_templ;
struct pipe_sampler_view src_templ, *src_view;
unsigned dst_width, dst_height, src_width0, src_height0;
unsigned dst_width0, dst_height0, src_force_level = 0;
struct pipe_box sbox, dstbox;
 
/* Handle buffers first. */
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
-   si_copy_buffer(sctx, dst, src, dstx, src_box->x, 
src_box->width, 0);
+   si_copy_buffer(sctx, dst, src, dstx, src_box->x, 
src_box->width, 0, -1);
return;
}
 
assert(u_max_sample(dst) == u_max_sample(src));
 
/* The driver doesn't decompress resources automatically while
 * u_blitter is rendering. */
si_decompress_subresource(ctx, src, PIPE_MASK_RGBAZS, src_level,
  src_box->z, src_box->z + src_box->depth - 1);
 
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 61be22f28b5..486ae75c77f 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -429,32 +429,34 @@ static void si_cp_dma_realign_engine(struct si_context 
*sctx, unsigned size,
 }
 
 /**
  * Do memcpy between buffers using CP DMA.
  *
  * \param user_flags   bitmask of SI_CPDMA_*
  */
 void si_copy_buffer(struct si_context *sctx,
struct pipe_resource *dst, struct pipe_resource *src,
uint64_t dst_offset, uint64_t src_offset, unsigned size,
-   unsigned user_flags)
+   unsigned user_flags, enum si_cache_policy cache_policy)
 {
uint64_t main_dst_offset, main_src_offset;
unsigned skipped_size = 0;
unsigned realign_size = 0;
enum si_coherency coher = SI_COHERENCY_SHADER;
-   enum si_cache_policy cache_policy = get_cache_policy(sctx, coher);
bool is_first = true;
 
if (!size)

[Mesa-dev] [PATCH 3/7] radeonsi: add SI_QUERY_TIME_ELAPSED_SDMA for measuring SDMA performance

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/sid.h |  4 
 src/gallium/drivers/radeonsi/si_dma_cs.c | 29 
 src/gallium/drivers/radeonsi/si_pipe.h   |  2 ++
 src/gallium/drivers/radeonsi/si_query.c  | 21 +++--
 src/gallium/drivers/radeonsi/si_query.h  |  1 +
 5 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index d9c4a1a7414..d696c01d4dd 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -9133,20 +9133,24 @@
 #defineCIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW  0x5
 #defineCIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW0x6
 #defineCIK_SDMA_OPCODE_WRITE   0x2
 #defineSDMA_WRITE_SUB_OPCODE_LINEAR   0x0
 #defineSDMA_WRTIE_SUB_OPCODE_TILED0x1
 #defineCIK_SDMA_OPCODE_INDIRECT_BUFFER 0x4
 #defineCIK_SDMA_PACKET_FENCE   0x5
 #defineCIK_SDMA_PACKET_TRAP0x6
 #defineCIK_SDMA_PACKET_SEMAPHORE   0x7
 #defineCIK_SDMA_PACKET_CONSTANT_FILL   0xb
+#defineCIK_SDMA_OPCODE_TIMESTAMP   0xd
+#defineSDMA_TS_SUB_OPCODE_SET_LOCAL_TIMESTAMP 0x0
+#defineSDMA_TS_SUB_OPCODE_GET_LOCAL_TIMESTAMP 0x1
+#defineSDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP0x2
 #defineCIK_SDMA_PACKET_SRBM_WRITE  0xe
 #defineCIK_SDMA_COPY_MAX_SIZE  0x3fffe0
 
 enum amd_cmp_class_flags {
S_NAN = 1 << 0,// Signaling NaN
Q_NAN = 1 << 1,// Quiet NaN
N_INFINITY = 1 << 2,   // Negative infinity
N_NORMAL = 1 << 3, // Negative normal
N_SUBNORMAL = 1 << 4,  // Negative subnormal
N_ZERO = 1 << 5,   // Negative zero
diff --git a/src/gallium/drivers/radeonsi/si_dma_cs.c 
b/src/gallium/drivers/radeonsi/si_dma_cs.c
index 3bb769309e3..7db9570af3c 100644
--- a/src/gallium/drivers/radeonsi/si_dma_cs.c
+++ b/src/gallium/drivers/radeonsi/si_dma_cs.c
@@ -16,32 +16,61 @@
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include "si_pipe.h"
+#include "sid.h"
 
 static void si_dma_emit_wait_idle(struct si_context *sctx)
 {
struct radeon_cmdbuf *cs = sctx->dma_cs;
 
/* NOP waits for idle. */
if (sctx->chip_class >= CIK)
radeon_emit(cs, 0x); /* NOP */
else
radeon_emit(cs, 0xf000); /* NOP */
 }
 
+void si_dma_emit_timestamp(struct si_context *sctx, struct r600_resource *dst,
+  uint64_t offset)
+{
+   struct radeon_cmdbuf *cs = sctx->dma_cs;
+   uint64_t va = dst->gpu_address + offset;
+
+   if (sctx->chip_class == SI) {
+   unreachable("SI DMA doesn't support the timestamp packet.");
+   return;
+   }
+
+   /* Mark the buffer range of destination as valid (initialized),
+* so that transfer_map knows it should wait for the GPU when mapping
+* that range. */
+   util_range_add(>valid_buffer_range, offset, offset + 8);
+
+   assert(va % 8 == 0);
+
+   si_need_dma_space(sctx, 4, dst, NULL);
+   si_dma_emit_wait_idle(sctx);
+
+   radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_TIMESTAMP,
+   SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP,
+   0));
+   radeon_emit(cs, va);
+   radeon_emit(cs, va >> 32);
+}
+
 void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
   struct r600_resource *dst, struct r600_resource *src)
 {
uint64_t vram = ctx->dma_cs->used_vram;
uint64_t gtt = ctx->dma_cs->used_gart;
 
if (dst) {
vram += dst->vram_usage;
gtt += dst->gart_usage;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 95489f09612..4c3f13b84e2 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1148,20 +1148,22 @@ void si_log_draw_state(struct si_context *sctx, struct 
u_log_context *log);
 void si_log_compute_state(struct si_context *sctx, struct u_log_context *log);
 void si_init_debug_functions(struct si_context *sctx);
 void si_check_vm_faults(struct si_context *sctx,
struct radeon_saved_cs *saved, enum ring_type ring);
 bool si_replace_shader(unsigned num, struct ac_shader_binary *binary);
 
 /* si_dma.c */
 void si_init_dma_functions(struct si_context 

[Mesa-dev] [PATCH 6/7] radeonsi: let internal compute dispatches tune WAVES_PER_SH

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_compute.c | 8 
 src/gallium/drivers/radeonsi/si_pipe.h| 1 +
 2 files changed, 9 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index c5d3d5fcf02..e0c6902fec4 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -777,20 +777,28 @@ static void si_emit_dispatch_packets(struct si_context 
*sctx,
if (sctx->chip_class >= CIK) {
unsigned num_cu_per_se = sscreen->info.num_good_compute_units /
 sscreen->info.max_se;
 
/* Force even distribution on all SIMDs in CU if the workgroup
 * size is 64. This has shown some good improvements if # of CUs
 * per SE is not a multiple of 4.
 */
if (num_cu_per_se % 4 && waves_per_threadgroup == 1)
compute_resource_limits |= S_00B854_FORCE_SIMD_DIST(1);
+
+   compute_resource_limits |= 
S_00B854_WAVES_PER_SH(sctx->cs_max_waves_per_sh);
+   } else {
+   /* SI */
+   if (sctx->cs_max_waves_per_sh) {
+   unsigned limit_div16 = 
DIV_ROUND_UP(sctx->cs_max_waves_per_sh, 16);
+   compute_resource_limits |= 
S_00B854_WAVES_PER_SH_SI(limit_div16);
+   }
}
 
radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
  compute_resource_limits);
 
radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0]));
radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1]));
radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2]));
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 100d0166f62..fe06064b388 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -818,20 +818,21 @@ struct si_context {
struct si_shader_ctx_state  ps_shader;
struct si_shader_ctx_state  gs_shader;
struct si_shader_ctx_state  vs_shader;
struct si_shader_ctx_state  tcs_shader;
struct si_shader_ctx_state  tes_shader;
struct si_cs_shader_state   cs_shader_state;
 
/* shader information */
struct si_vertex_elements   *vertex_elements;
unsignedsprite_coord_enable;
+   unsignedcs_max_waves_per_sh;
boolflatshade;
booldo_update_shaders;
 
/* vertex buffer descriptors */
uint32_t *vb_descriptors_gpu_list;
struct r600_resource *vb_descriptors_buffer;
unsigned vb_descriptors_offset;
 
/* shader descriptors */
struct si_descriptors   descriptors[SI_NUM_DESCS];
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/7] gallium: add TGSI_MEMORY_STREAM_CACHE_POLICY

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

For internal radeonsi shaders.
---
 src/gallium/auxiliary/tgsi/tgsi_strings.c | 3 ++-
 src/gallium/auxiliary/tgsi/tgsi_strings.h | 2 +-
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 3 +++
 src/gallium/include/pipe/p_shader_tokens.h| 8 ++--
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c 
b/src/gallium/auxiliary/tgsi/tgsi_strings.c
index 434871273f2..03261ed2d99 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
@@ -221,25 +221,26 @@ const char *tgsi_fs_coord_pixel_center_names[2] =
 const char *tgsi_immediate_type_names[6] =
 {
"FLT32",
"UINT32",
"INT32",
"FLT64",
"UINT64",
"INT64",
 };
 
-const char *tgsi_memory_names[3] =
+const char *tgsi_memory_names[4] =
 {
"COHERENT",
"RESTRICT",
"VOLATILE",
+   "STREAM_CACHE_POLICY",
 };
 
 
 static inline void
 tgsi_strings_check(void)
 {
STATIC_ASSERT(ARRAY_SIZE(tgsi_semantic_names) == TGSI_SEMANTIC_COUNT);
STATIC_ASSERT(ARRAY_SIZE(tgsi_texture_names) == TGSI_TEXTURE_COUNT);
STATIC_ASSERT(ARRAY_SIZE(tgsi_property_names) == TGSI_PROPERTY_COUNT);
STATIC_ASSERT(ARRAY_SIZE(tgsi_primitive_names) == PIPE_PRIM_MAX);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.h 
b/src/gallium/auxiliary/tgsi/tgsi_strings.h
index 20e3f7127f6..61068d562d8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.h
@@ -55,21 +55,21 @@ extern const char 
*tgsi_interpolate_locations[TGSI_INTERPOLATE_LOC_COUNT];
 extern const char *tgsi_invariant_name;
 
 extern const char *tgsi_primitive_names[PIPE_PRIM_MAX];
 
 extern const char *tgsi_fs_coord_origin_names[2];
 
 extern const char *tgsi_fs_coord_pixel_center_names[2];
 
 extern const char *tgsi_immediate_type_names[6];
 
-extern const char *tgsi_memory_names[3];
+extern const char *tgsi_memory_names[4];
 
 
 const char *
 tgsi_file_name(unsigned file);
 
 
 #if defined __cplusplus
 }
 #endif
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 8e0578b4d5e..eaa200a95d6 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -324,20 +324,23 @@ static unsigned get_cache_policy(struct si_shader_context 
*ctx,
 * The only way to get unaligned stores in radeonsi is through
 * shader images. */
((may_store_unaligned && ctx->screen->info.chip_class == SI) ||
 /* If this is write-only, don't keep data in L1 to prevent
  * evicting L1 cache lines that may be needed by other
  * instructions. */
 writeonly_memory ||
 inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | 
TGSI_MEMORY_VOLATILE)))
cache_policy |= ac_glc;
 
+   if (inst->Memory.Qualifier & TGSI_MEMORY_STREAM_CACHE_POLICY)
+   cache_policy |= ac_slc;
+
return cache_policy;
 }
 
 static LLVMValueRef get_memory_ptr(struct si_shader_context *ctx,
const struct tgsi_full_instruction *inst,
LLVMTypeRef type, int arg)
 {
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef offset, ptr;
int addr_space;
diff --git a/src/gallium/include/pipe/p_shader_tokens.h 
b/src/gallium/include/pipe/p_shader_tokens.h
index 08ed08156e7..bef826f23b5 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -783,30 +783,34 @@ struct tgsi_dst_register
unsigned WriteMask   : 4;  /* TGSI_WRITEMASK_ */
unsigned Indirect: 1;  /* BOOL */
unsigned Dimension   : 1;  /* BOOL */
int  Index   : 16; /* SINT */
unsigned Padding : 6;
 };
 
 #define TGSI_MEMORY_COHERENT (1 << 0)
 #define TGSI_MEMORY_RESTRICT (1 << 1)
 #define TGSI_MEMORY_VOLATILE (1 << 2)
+/* The "stream" cache policy will minimize memory cache usage if other
+ * memory operations need the cache.
+ */
+#define TGSI_MEMORY_STREAM_CACHE_POLICY (1 << 3)
 
 /**
  * Specifies the type of memory access to do for the LOAD/STORE instruction.
  */
 struct tgsi_instruction_memory
 {
-   unsigned Qualifier : 3;  /* TGSI_MEMORY_ */
+   unsigned Qualifier : 4;  /* TGSI_MEMORY_ */
unsigned Texture   : 8;  /* only for images: TGSI_TEXTURE_ */
unsigned Format: 10; /* only for images: PIPE_FORMAT_ */
-   unsigned Padding   : 11;
+   unsigned Padding   : 10;
 };
 
 #define TGSI_MEMBAR_SHADER_BUFFER (1 << 0)
 #define TGSI_MEMBAR_ATOMIC_BUFFER (1 << 1)
 #define TGSI_MEMBAR_SHADER_IMAGE  (1 << 2)
 #define TGSI_MEMBAR_SHARED(1 << 3)
 #define TGSI_MEMBAR_THREAD_GROUP  (1 << 4)
 
 #ifdef __cplusplus
 }
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

[Mesa-dev] [PATCH 2/7] radeonsi: add flag L2_STREAM for minimal cache usage

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/sid.h |  2 ++
 src/gallium/drivers/radeonsi/si_cp_dma.c | 16 ++--
 src/gallium/drivers/radeonsi/si_pipe.h   |  1 +
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index 0671f7d3998..d9c4a1a7414 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -287,25 +287,27 @@
  * 5. DST_ADDR_HI [31:0]
  * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
  */
 #define   R_500_DMA_DATA_WORD0 0x500 /* 0x[packet number][word index] 
*/
 #define S_500_CP_SYNC(x)   (((unsigned)(x) & 0x1) << 31)
 #define S_500_SRC_SEL(x)   (((unsigned)(x) & 0x3) << 29)
 #define   V_500_SRC_ADDR   0
 #define   V_500_GDS1 /* program SAS to 1 as well */
 #define   V_500_DATA   2
 #define   V_500_SRC_ADDR_TC_L2 3 /* new for CIK */
+#define S_500_DST_CACHE_POLICY(x)  (((unsigned)(x) & 0x3) << 25) /* CIK+ */
 #define S_500_DST_SEL(x)   (((unsigned)(x) & 0x3) << 20)
 #define   V_500_DST_ADDR   0
 #define   V_500_GDS1 /* program DAS to 1 as well */
 #define   V_500_NOWHERE2 /* new for GFX9 */
 #define   V_500_DST_ADDR_TC_L2 3 /* new for CIK */
+#define S_500_SRC_CACHE_POLICY(x)  (((unsigned)(x) & 0x3) << 13) /* CIK+ */
 #define S_500_ENGINE(x)((x) & 0x1)
 #define   V_500_ME 0
 #define   V_500_PFP1
 #define   R_501_SRC_ADDR_LO0x501
 #define   R_502_SRC_ADDR_HI0x502
 #define   R_503_DST_ADDR_LO0x503
 #define   R_504_DST_ADDR_HI0x504
 
 #define R_000E4C_SRBM_STATUS2   
0x000E4C
 #define   S_000E4C_SDMA_RQ_PENDING(x) 
(((unsigned)(x) & 0x1) << 0)
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index bae592a4f7d..61be22f28b5 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -81,29 +81,33 @@ static void si_emit_cp_dma(struct si_context *sctx, 
uint64_t dst_va,
command |= S_414_DISABLE_WR_CONFIRM_GFX9(1);
else
command |= S_414_DISABLE_WR_CONFIRM_GFX6(1);
}
 
if (flags & CP_DMA_RAW_WAIT)
command |= S_414_RAW_WAIT(1);
 
/* Src and dst flags. */
if (sctx->chip_class >= GFX9 && !(flags & CP_DMA_CLEAR) &&
-   src_va == dst_va)
+   src_va == dst_va) {
header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */
-   else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS)
-   header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
+   } else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) {
+   header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2) |
+ S_500_DST_CACHE_POLICY(cache_policy == L2_STREAM);
+   }
 
-   if (flags & CP_DMA_CLEAR)
+   if (flags & CP_DMA_CLEAR) {
header |= S_411_SRC_SEL(V_411_DATA);
-   else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS)
-   header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
+   } else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) {
+   header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) |
+ S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM);
+   }
 
if (sctx->chip_class >= CIK) {
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
radeon_emit(cs, header);
radeon_emit(cs, src_va);/* SRC_ADDR_LO [31:0] */
radeon_emit(cs, src_va >> 32);  /* SRC_ADDR_HI [31:0] */
radeon_emit(cs, dst_va);/* DST_ADDR_LO [31:0] */
radeon_emit(cs, dst_va >> 32);  /* DST_ADDR_HI [31:0] */
radeon_emit(cs, command);
} else {
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 5fa8c33f6cb..95489f09612 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1104,20 +1104,21 @@ void si_init_clear_functions(struct si_context *sctx);
 #define SI_CPDMA_SKIP_BO_LIST_UPDATE   (1 << 4) /* don't update the BO list */
 #define SI_CPDMA_SKIP_ALL (SI_CPDMA_SKIP_CHECK_CS_SPACE | \
   SI_CPDMA_SKIP_SYNC_AFTER | \
   SI_CPDMA_SKIP_SYNC_BEFORE | \
   SI_CPDMA_SKIP_GFX_SYNC | \
   SI_CPDMA_SKIP_BO_LIST_UPDATE)
 
 enum si_cache_policy {
L2_BYPASS,
L2_LRU,/* same as SLC=0 */
+   L2_STREAM, /* same as SLC=1 */
 };
 
 enum si_coherency {
SI_COHERENCY_NONE, /* no cache flushes needed */
SI_COHERENCY_SHADER,

[Mesa-dev] [PATCH 4/7] radeonsi: add SI_QUERY_TIME_ELAPSED_SDMA_SI for measuring DMA on SI

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

DMA on SI doesn't support the timestamp packet, so it's emulated.
---
 src/gallium/drivers/radeonsi/si_query.c | 19 +++
 src/gallium/drivers/radeonsi/si_query.h |  1 +
 2 files changed, 20 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_query.c 
b/src/gallium/drivers/radeonsi/si_query.c
index 93efbd4ef4a..80e84c23937 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -85,30 +85,46 @@ static enum radeon_value_id winsys_id_from_type(unsigned 
type)
case SI_QUERY_VRAM_VIS_USAGE: return RADEON_VRAM_VIS_USAGE;
case SI_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
case SI_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
case SI_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
case SI_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
case SI_QUERY_CS_THREAD_BUSY: return RADEON_CS_THREAD_TIME;
default: unreachable("query type does not correspond to winsys id");
}
 }
 
+static int64_t si_finish_dma_get_cpu_time(struct si_context *sctx)
+{
+   struct pipe_fence_handle *fence = NULL;
+
+   si_flush_dma_cs(sctx, 0, );
+   if (fence) {
+   sctx->ws->fence_wait(sctx->ws, fence, PIPE_TIMEOUT_INFINITE);
+   sctx->ws->fence_reference(, NULL);
+   }
+
+   return os_time_get_nano();
+}
+
 static bool si_query_sw_begin(struct si_context *sctx,
  struct si_query *rquery)
 {
struct si_query_sw *query = (struct si_query_sw *)rquery;
enum radeon_value_id ws_id;
 
switch(query->b.type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_GPU_FINISHED:
break;
+   case SI_QUERY_TIME_ELAPSED_SDMA_SI:
+   query->begin_result = si_finish_dma_get_cpu_time(sctx);
+   break;
case SI_QUERY_DRAW_CALLS:
query->begin_result = sctx->num_draw_calls;
break;
case SI_QUERY_DECOMPRESS_CALLS:
query->begin_result = sctx->num_decompress_calls;
break;
case SI_QUERY_MRT_DRAW_CALLS:
query->begin_result = sctx->num_mrt_draw_calls;
break;
case SI_QUERY_PRIM_RESTART_CALLS:
@@ -255,20 +271,23 @@ static bool si_query_sw_end(struct si_context *sctx,
 {
struct si_query_sw *query = (struct si_query_sw *)rquery;
enum radeon_value_id ws_id;
 
switch(query->b.type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
break;
case PIPE_QUERY_GPU_FINISHED:
sctx->b.flush(>b, >fence, PIPE_FLUSH_DEFERRED);
break;
+   case SI_QUERY_TIME_ELAPSED_SDMA_SI:
+   query->end_result = si_finish_dma_get_cpu_time(sctx);
+   break;
case SI_QUERY_DRAW_CALLS:
query->end_result = sctx->num_draw_calls;
break;
case SI_QUERY_DECOMPRESS_CALLS:
query->end_result = sctx->num_decompress_calls;
break;
case SI_QUERY_MRT_DRAW_CALLS:
query->end_result = sctx->num_mrt_draw_calls;
break;
case SI_QUERY_PRIM_RESTART_CALLS:
diff --git a/src/gallium/drivers/radeonsi/si_query.h 
b/src/gallium/drivers/radeonsi/si_query.h
index bc3eb397bc5..cf2eccd862b 100644
--- a/src/gallium/drivers/radeonsi/si_query.h
+++ b/src/gallium/drivers/radeonsi/si_query.h
@@ -103,20 +103,21 @@ enum {
SI_QUERY_NUM_COMPILATIONS,
SI_QUERY_NUM_SHADERS_CREATED,
SI_QUERY_BACK_BUFFER_PS_DRAW_RATIO,
SI_QUERY_NUM_SHADER_CACHE_HITS,
SI_QUERY_GPIN_ASIC_ID,
SI_QUERY_GPIN_NUM_SIMD,
SI_QUERY_GPIN_NUM_RB,
SI_QUERY_GPIN_NUM_SPI,
SI_QUERY_GPIN_NUM_SE,
SI_QUERY_TIME_ELAPSED_SDMA,
+   SI_QUERY_TIME_ELAPSED_SDMA_SI, /* emulated, measured on the CPU */
 
SI_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100,
 };
 
 enum {
SI_QUERY_GROUP_GPIN = 0,
SI_NUM_SW_QUERY_GROUPS
 };
 
 struct si_query_ops {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] gallium/auxiliary: Add util_format_get_depth_only() helper.

2018-08-20 Thread Eric Anholt
Kenneth Graunke  writes:

> This will be used by u_transfer_helper.c shortly, in order to split
> packed depth-stencil into separate resources.
> ---
>  src/gallium/auxiliary/util/u_format.h | 21 +
>  1 file changed, 21 insertions(+)
>
> diff --git a/src/gallium/auxiliary/util/u_format.h 
> b/src/gallium/auxiliary/util/u_format.h
> index f421222f854..e66849c16b1 100644
> --- a/src/gallium/auxiliary/util/u_format.h
> +++ b/src/gallium/auxiliary/util/u_format.h
> @@ -557,6 +557,27 @@ util_format_is_depth_and_stencil(enum pipe_format format)
>util_format_has_stencil(desc);
>  }
>  
> +/**
> + * For depth-stencil formats, return the equivalent depth-only format.
> + */
> +static inline boolean
> +util_format_get_depth_only(enum pipe_format format)

Given that the equivalent function for stencil is
util_format_stencil_only(), can we get naming consistency?  Other than
that, r-b.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 07/11] intel/nir: Use the new structure and array splitting passes

2018-08-20 Thread Caio Marcelo de Oliveira Filho
On Sat, Jul 28, 2018 at 10:44:38PM -0700, Jason Ekstrand wrote:
> Shader-db results on Kaby Lake:
> 
> total instructions in shared programs: 15177605 -> 15177605 (0.00%)
> instructions in affected programs: 0 -> 0
> helped: 0
> HURT: 0
> 
> This is unsurprising because nir_lower_vars_to_ssa already effectively
> does structure and array splitting internally.  It doesn't actually
> split the variables but it's ability to reason about aliasing in the
> presence of arrays and structures and pick out scalars or vectors to be
> lowered to SSA values is fairly advanced.
> ---
>  src/intel/compiler/brw_nir.c | 2 ++
>  1 file changed, 2 insertions(+)

This patch is

Reviewed-by: Caio Marcelo de Oliveira Filho 


> diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
> index 5990427b731..96ad77c3906 100644
> --- a/src/intel/compiler/brw_nir.c
> +++ b/src/intel/compiler/brw_nir.c
> @@ -541,6 +541,7 @@ brw_nir_optimize(nir_shader *nir, const struct 
> brw_compiler *compiler,
> bool progress;
> do {
>progress = false;
> +  OPT(nir_split_array_vars, nir_var_local);
>OPT(nir_lower_vars_to_ssa);
>OPT(nir_opt_copy_prop_vars);

I'm guessing we do this multiple times (instead of only once like
struct) because other optimizations might turn indirects into directs,
allowing more splitting.  If that's the case maybe worth a note in the
commit message.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl/linker: Allow unused in blocks which are not declated on previous stage

2018-08-20 Thread Timothy Arceri

On 20/08/18 23:31, vadym.shovkoplias wrote:

 From Section 4.3.4 (Inputs) of the GLSL 1.50 spec:

 "Only the input variables that are actually read need to be written
  by the previous stage; it is allowed to have superfluous
  declarations of input variables."

Fixes:
 * interstage-multiple-shader-objects.shader_test

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101247
Signed-off-by: Vadym Shovkoplias 
---
  src/compiler/glsl/link_interface_blocks.cpp | 8 +++-
  1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/link_interface_blocks.cpp 
b/src/compiler/glsl/link_interface_blocks.cpp
index e5eca9460e..801fbcd5d9 100644
--- a/src/compiler/glsl/link_interface_blocks.cpp
+++ b/src/compiler/glsl/link_interface_blocks.cpp
@@ -417,9 +417,15 @@ validate_interstage_inout_blocks(struct gl_shader_program 
*prog,
 * write to any of the pre-defined outputs (e.g. if the vertex shader
 * does not write to gl_Position, etc), which is allowed and results in
 * undefined behavior.
+   *
+   * From Section 4.3.4 (Inputs) of the GLSL 1.50 spec:
+   *
+   *"Only the input variables that are actually read need to be written
+   * by the previous stage; it is allowed to have superfluous
+   * declarations of input variables."
 */
if (producer_def == NULL &&
-  !is_builtin_gl_in_block(var, consumer->Stage)) {
+  !is_builtin_gl_in_block(var, consumer->Stage) && var->data.used) {


This concerns me a little. As far as I remember 'used' was added to make 
compiler warning better but it's not 100% reliable.



   linker_error(prog, "Input block `%s' is not an output of "
"the previous stage\n", 
var->get_interface_type()->name);
   return;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/8] mesa: expose AMD_texture_texture4

2018-08-20 Thread Marek Olšák
On Wed, Aug 15, 2018 at 3:10 PM Ian Romanick  wrote:
>
> On 08/08/2018 07:12 PM, Marek Olšák wrote:
> > From: Marek Olšák 
> >
> > because the closed driver exposes it.
>
> Aside from AMD_texture_texture4 being really, really under-specified,
> there is one big difference between the two extensions.
>
> 7)  Can both texture *AND* texture4 built-in functions
> sample from the same sampler in a shader?
>
> No.
>
> vs.
>
> (5) Can both texture *AND* textureGather built-in functions
> sample from the same sampler in a shader?
>
>RESOLVED: Yes.
>
> Of course, the AMD_texture_texture4 spec doesn't say what "no" means.
> Compile error?  Garbage results?  A kitten dies?  Zero guidance from the
> spec.  We should imitate whatever the closed driver does... unless a
> kitten dies.  Don't do that.

The closed driver handles it exactly the same as ARB_texture_gather,
which is functionally equivalent to this patch. There is no compile error.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4] anv: add VK_EXT_sampler_filter_minmax support

2018-08-20 Thread He, Yunchao
Thanks for your review, Lionel. 
With the help from Clayton Craft to run the CI, this patch can pass CI: 
http://otc-mesa-ci.jf.intel.com/job/Richard_Yunchao/1/ 

Regards
Yunchao

-Original Message-
From: Landwerlin, Lionel G 
Sent: Monday, August 20, 2018 11:11 AM
To: He, Yunchao ; mesa-dev@lists.freedesktop.org
Subject: Re: [Mesa-dev] [PATCH v4] anv: add VK_EXT_sampler_filter_minmax support

On 20/08/2018 17:29, Yunchao He wrote:
> This extension can be supported on SKL+. With this patch, all 
> corresponding tests (6K+) in CTS can pass. No test fails.
>
> I verified CTS with the command below:
> deqp-vk --deqp-case=dEQP-VK.pipeline.sampler.view_type.*reduce*
>
> v2: 1) support all depth formats, not depth-only formats, 2) fix a 
> wrong indention (Jason).
>
> v3: fix a few nits (Lionel).
>
> v4: fix failures in CI: disable sampler reduction when sampler 
> reduction mode is not specified via this extension (Lionel).
Looks good to me :

Reviewed-by: Lionel Landwerlin 


> ---
>   src/intel/vulkan/anv_device.c  |  8 
>   src/intel/vulkan/anv_extensions.py |  1 +
>   src/intel/vulkan/anv_formats.c |  6 ++
>   src/intel/vulkan/genX_state.c  | 28 
>   4 files changed, 43 insertions(+)
>
> diff --git a/src/intel/vulkan/anv_device.c 
> b/src/intel/vulkan/anv_device.c index 04fd6a829e..e45ba4b3af 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -1116,6 +1116,14 @@ void anv_GetPhysicalDeviceProperties2(
>break;
> }
>   
> +  case 
> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: {
> + VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties =
> +(VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext;
> + properties->filterMinmaxImageComponentMapping = pdevice->info.gen 
> >= 9;
> + properties->filterMinmaxSingleComponentFormats = true;
> + break;
> +  }
> +
> case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
>VkPhysicalDeviceSubgroupProperties *properties = (void 
> *)ext;
>   
> diff --git a/src/intel/vulkan/anv_extensions.py 
> b/src/intel/vulkan/anv_extensions.py
> index ea837744b4..e165bd371d 100644
> --- a/src/intel/vulkan/anv_extensions.py
> +++ b/src/intel/vulkan/anv_extensions.py
> @@ -125,6 +125,7 @@ EXTENSIONS = [
>   Extension('VK_EXT_shader_stencil_export', 1, 
> 'device->info.gen >= 9'),
>   Extension('VK_EXT_vertex_attribute_divisor',  2, True),
>   Extension('VK_EXT_post_depth_coverage',   1, 
> 'device->info.gen >= 9'),
> +Extension('VK_EXT_sampler_filter_minmax', 1, 
> 'device->info.gen >= 9'),
>   ]
>   
>   class VkVersion:
> diff --git a/src/intel/vulkan/anv_formats.c 
> b/src/intel/vulkan/anv_formats.c index 815b320a82..33faf7cc37 100644
> --- a/src/intel/vulkan/anv_formats.c
> +++ b/src/intel/vulkan/anv_formats.c
> @@ -489,6 +489,9 @@ get_image_format_features(const struct gen_device_info 
> *devinfo,
> if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT || devinfo->gen >= 8)
>flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
>   
> +  if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && devinfo->gen >= 9)
> + flags |= 
> + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT_EXT;
> +
> flags |= VK_FORMAT_FEATURE_BLIT_SRC_BIT |
>  VK_FORMAT_FEATURE_BLIT_DST_BIT |
>  VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR | @@ -521,6 
> +524,9 @@ get_image_format_features(const struct gen_device_info *devinfo,
>  if (isl_format_supports_sampling(devinfo, plane_format.isl_format)) {
> flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
>   
> +  if (devinfo->gen >= 9)
> + flags |= 
> + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT_EXT;
> +
> if (isl_format_supports_filtering(devinfo, plane_format.isl_format))
>flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
>  }
> diff --git a/src/intel/vulkan/genX_state.c 
> b/src/intel/vulkan/genX_state.c index b1014d9e79..2627851ed9 100644
> --- a/src/intel/vulkan/genX_state.c
> +++ b/src/intel/vulkan/genX_state.c
> @@ -245,6 +245,14 @@ static const uint32_t vk_to_gen_shadow_compare_op[] = {
>  [VK_COMPARE_OP_ALWAYS]   = PREFILTEROPNEVER,
>   };
>   
> +#if GEN_GEN >= 9
> +static const uint32_t vk_to_gen_sampler_reduction_mode[] = {
> +   [VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT] = STD_FILTER,
> +   [VK_SAMPLER_REDUCTION_MODE_MIN_EXT]  = MINIMUM,
> +   [VK_SAMPLER_REDUCTION_MODE_MAX_EXT]  = MAXIMUM,
> +};
> +#endif
> +
>   VkResult genX(CreateSampler)(
>   VkDevice_device,
>   const VkSamplerCreateInfo*  pCreateInfo,
> @@ -266,6 +274,11 @@ VkResult genX(CreateSampler)(
>  uint32_t border_color_offset = device->border_colors.offset +
> 

[Mesa-dev] [Bug 99014] clover is broken for Oland in 12.0.4

2018-08-20 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=99014

Timothy Arceri  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |WONTFIX

--- Comment #11 from Timothy Arceri  ---
Support for mesa 12.0 ended long ago and the bug reports the issue was gone in
13.0 so marking as wont fix.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/9] radeonsi: use ac_build_imad

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader.c  | 54 ++-
 .../drivers/radeonsi/si_shader_tgsi_mem.c | 18 +++
 .../drivers/radeonsi/si_shader_tgsi_setup.c   | 14 ++---
 3 files changed, 29 insertions(+), 57 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 29523474735..24ee45f578a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -371,38 +371,32 @@ get_tcs_in_current_patch_offset(struct si_shader_context 
*ctx)
return LLVMBuildMul(ctx->ac.builder, patch_stride, rel_patch_id, "");
 }
 
 static LLVMValueRef
 get_tcs_out_current_patch_offset(struct si_shader_context *ctx)
 {
LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx);
LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
 
-   return LLVMBuildAdd(ctx->ac.builder, patch0_offset,
-   LLVMBuildMul(ctx->ac.builder, patch_stride,
-rel_patch_id, ""),
-   "");
+   return ac_build_imad(>ac, patch_stride, rel_patch_id, 
patch0_offset);
 }
 
 static LLVMValueRef
 get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx)
 {
LLVMValueRef patch0_patch_data_offset =
get_tcs_out_patch0_patch_data_offset(ctx);
LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
 
-   return LLVMBuildAdd(ctx->ac.builder, patch0_patch_data_offset,
-   LLVMBuildMul(ctx->ac.builder, patch_stride,
-rel_patch_id, ""),
-   "");
+   return ac_build_imad(>ac, patch_stride, rel_patch_id, 
patch0_patch_data_offset);
 }
 
 static LLVMValueRef get_num_tcs_out_vertices(struct si_shader_context *ctx)
 {
unsigned tcs_out_vertices =
ctx->shader->selector ?

ctx->shader->selector->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : 0;
 
/* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS 
epilog. */
if (ctx->type == PIPE_SHADER_TESS_CTRL && tcs_out_vertices)
@@ -808,26 +802,22 @@ LLVMValueRef si_get_indirect_index(struct 
si_shader_context *ctx,
/* Set the second index to 0 for constants. */
if (ind->File == TGSI_FILE_CONSTANT)
src.Register.Dimension = 1;
 
result = 
ctx->bld_base.emit_fetch_funcs[ind->File](>bld_base, ,
   
TGSI_TYPE_SIGNED,
   
ind->Swizzle);
result = ac_to_integer(>ac, result);
}
 
-   if (addr_mul != 1)
-   result = LLVMBuildMul(ctx->ac.builder, result,
- LLVMConstInt(ctx->i32, addr_mul, 0), "");
-   result = LLVMBuildAdd(ctx->ac.builder, result,
- LLVMConstInt(ctx->i32, rel_index, 0), "");
-   return result;
+   return ac_build_imad(>ac, result, LLVMConstInt(ctx->i32, addr_mul, 
0),
+LLVMConstInt(ctx->i32, rel_index, 0));
 }
 
 /**
  * Like si_get_indirect_index, but restricts the return value to a (possibly
  * undefined) value inside [0..num).
  */
 LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx,
   const struct tgsi_ind_register *ind,
   int rel_index, unsigned num)
 {
@@ -840,29 +830,27 @@ static LLVMValueRef 
get_dw_address_from_generic_indices(struct si_shader_context
LLVMValueRef 
vertex_dw_stride,
LLVMValueRef base_addr,
LLVMValueRef 
vertex_index,
LLVMValueRef 
param_index,
unsigned input_index,
ubyte *name,
ubyte *index,
bool is_patch)
 {
if (vertex_dw_stride) {
-   base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
-LLVMBuildMul(ctx->ac.builder, 
vertex_index,
- vertex_dw_stride, ""), 
"");
+   base_addr = ac_build_imad(>ac, vertex_index,
+ vertex_dw_stride, base_addr);
}
 
if (param_index) {
-   base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
-  

[Mesa-dev] [PATCH 1/9] ac: completely remove +auto-waitcnt-before-barrier

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

it causes corruption on several different GPU generations.

Cc: 18.2 
---
 src/amd/common/ac_llvm_util.c  | 7 ++-
 src/amd/common/ac_llvm_util.h  | 1 -
 src/gallium/drivers/radeonsi/si_pipe.c | 1 -
 3 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index 42bc538b4d7..cd3525187a0 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -142,30 +142,27 @@ const char *ac_get_llvm_processor_name(enum radeon_family 
family)
 
 static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
 enum 
ac_target_machine_options tm_options,
 LLVMCodeGenOptLevel level,
 const char **out_triple)
 {
assert(family >= CHIP_TAHITI);
char features[256];
const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? 
"amdgcn-mesa-mesa3d" : "amdgcn--";
LLVMTargetRef target = ac_get_llvm_target(triple);
-   bool barrier_does_waitcnt = (tm_options & 
AC_TM_AUTO_WAITCNT_BEFORE_BARRIER) &&
-   family != CHIP_VEGA20;
 
snprintf(features, sizeof(features),
-
"+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s%s",
+
"+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s",
 tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
 tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
 tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
-tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? 
",-promote-alloca" : "",
-barrier_does_waitcnt ? ",+auto-waitcnt-before-barrier" : "");
+tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? 
",-promote-alloca" : "");

LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
 target,
 triple,
 ac_get_llvm_processor_name(family),
 features,
 level,
 LLVMRelocDefault,
 LLVMCodeModelDefault);
 
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index e252bed3bb6..eaf5f21876b 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -58,21 +58,20 @@ enum ac_func_attr {
 
 enum ac_target_machine_options {
AC_TM_SUPPORTS_SPILL = (1 << 0),
AC_TM_SISCHED = (1 << 1),
AC_TM_FORCE_ENABLE_XNACK = (1 << 2),
AC_TM_FORCE_DISABLE_XNACK = (1 << 3),
AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 4),
AC_TM_CHECK_IR = (1 << 5),
AC_TM_ENABLE_GLOBAL_ISEL = (1 << 6),
AC_TM_CREATE_LOW_OPT = (1 << 7),
-   AC_TM_AUTO_WAITCNT_BEFORE_BARRIER = (1 << 8),
 };
 
 enum ac_float_mode {
AC_FLOAT_MODE_DEFAULT,
AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH,
AC_FLOAT_MODE_UNSAFE_FP_MATH,
 };
 
 /* Per-thread persistent LLVM objects. */
 struct ac_llvm_compiler {
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 4327a3f749b..13fcf1f3aea 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -108,21 +108,20 @@ static const struct debug_named_value debug_options[] = {
 
 static void si_init_compiler(struct si_screen *sscreen,
 struct ac_llvm_compiler *compiler)
 {
/* Only create the less-optimizing version of the compiler on APUs
 * predating Ryzen (Raven). */
bool create_low_opt_compiler = !sscreen->info.has_dedicated_vram &&
   sscreen->info.chip_class <= VI;
 
enum ac_target_machine_options tm_options =
-   AC_TM_AUTO_WAITCNT_BEFORE_BARRIER |
(sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) |
(sscreen->debug_flags & DBG(GISEL) ? AC_TM_ENABLE_GLOBAL_ISEL : 
0) |
(sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 
0) |
(sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 
0) |
(!sscreen->llvm_has_working_vgpr_indexing ? 
AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0) |
(sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0) |
(create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0);
 
ac_init_llvm_once();
ac_init_llvm_compiler(compiler, true, sscreen->info.family, tm_options);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 8/9] ac,radeonsi: use ac_build_fmad

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_llvm_build.c   |  3 +--
 src/amd/common/ac_nir_to_llvm.c  |  7 ++-
 src/gallium/drivers/radeonsi/si_shader.c | 17 +
 3 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 6d5bfb1a1be..c741a1ab62d 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -755,22 +755,21 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx,
 
/* Shift the texture coordinate. This must be applied after the
 * derivative calculation.
 */
for (int i = 0; i < 2; ++i)
coords[i] = LLVMBuildFAdd(builder, coords[i], 
LLVMConstReal(ctx->f32, 1.5), "");
 
if (is_array) {
/* for cube arrays coord.z = coord.w(array_index) * 8 + face */
/* coords_arg.w component - array_index for cube arrays */
-   LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], 
LLVMConstReal(ctx->f32, 8.0), "");
-   coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], "");
+   coords[2] = ac_build_fmad(ctx, coords_arg[3], 
LLVMConstReal(ctx->f32, 8.0), coords[2]);
}
 
memcpy(coords_arg, coords, sizeof(coords));
 }
 
 
 LLVMValueRef
 ac_build_fs_interp(struct ac_llvm_context *ctx,
   LLVMValueRef llvm_chan,
   LLVMValueRef attr_number,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 1584fef7ab7..537ac33c044 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2846,25 +2846,22 @@ static LLVMValueRef visit_interp(struct ac_nir_context 
*ctx,
  ddxy_out, 
ix_ll, "");
LLVMValueRef ddy_el = 
LLVMBuildExtractElement(ctx->ac.builder,
  ddxy_out, 
iy_ll, "");
LLVMValueRef interp_el = 
LLVMBuildExtractElement(ctx->ac.builder,
 
interp_param, ix_ll, "");
LLVMValueRef temp1, temp2;
 
interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el,
 ctx->ac.f32, "");
 
-   temp1 = LLVMBuildFMul(ctx->ac.builder, ddx_el, src_c0, 
"");
-   temp1 = LLVMBuildFAdd(ctx->ac.builder, temp1, 
interp_el, "");
-
-   temp2 = LLVMBuildFMul(ctx->ac.builder, ddy_el, src_c1, 
"");
-   temp2 = LLVMBuildFAdd(ctx->ac.builder, temp2, temp1, 
"");
+   temp1 = ac_build_fmad(>ac, ddx_el, src_c0, 
interp_el);
+   temp2 = ac_build_fmad(>ac, ddy_el, src_c1, temp1);
 
ij_out[i] = LLVMBuildBitCast(ctx->ac.builder,
 temp2, ctx->ac.i32, "");
}
interp_param = ac_build_gather_values(>ac, ij_out, 2);
 
}
 
for (chan = 0; chan < 4; chan++) {
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 24ee45f578a..66fe5fad218 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2652,24 +2652,22 @@ static void si_llvm_emit_clipvertex(struct 
si_shader_context *ctx,
args->out[3] = LLVMConstReal(ctx->f32, 0.0f);
 
/* Compute dot products of position and user clip plane vectors 
*/
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
for (const_chan = 0; const_chan < TGSI_NUM_CHANNELS; 
const_chan++) {
LLVMValueRef addr =
LLVMConstInt(ctx->i32, ((reg_index * 4 
+ chan) * 4 +
const_chan) * 
4, 0);
base_elt = buffer_load_const(ctx, 
const_resource,
 addr);
-   args->out[chan] =
-   LLVMBuildFAdd(ctx->ac.builder, 
args->out[chan],
- 
LLVMBuildFMul(ctx->ac.builder, base_elt,
-   
out_elts[const_chan], ""), "");
+   args->out[chan] = ac_build_fmad(>ac, 
base_elt,
+   
out_elts[const_chan], args->out[chan]);
}
}
 
args->enabled_channels = 0xf;
args->valid_mask = 0;
args->done = 0;
 

[Mesa-dev] [PATCH 9/9] ac, radeonsi: use ac_build_gather_values more

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_nir_to_llvm.c   | 14 +++
 src/gallium/drivers/radeonsi/si_shader.c  |  8 +++---
 .../drivers/radeonsi/si_shader_tgsi_mem.c | 25 +++
 .../drivers/radeonsi/si_shader_tgsi_setup.c   | 17 -
 4 files changed, 20 insertions(+), 44 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 537ac33c044..700e48e14b7 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -479,35 +479,30 @@ static LLVMValueRef emit_pack_half_2x16(struct 
ac_llvm_context *ctx,
comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
 
return LLVMBuildBitCast(ctx->builder, ac_build_cvt_pkrtz_f16(ctx, comp),
ctx->i32, "");
 }
 
 static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
  LLVMValueRef src0)
 {
LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
-   LLVMValueRef temps[2], result, val;
+   LLVMValueRef temps[2], val;
int i;
 
for (i = 0; i < 2; i++) {
val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : 
src0;
val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
}
-
-   result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), 
temps[0],
-   ctx->i32_0, "");
-   result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
-   ctx->i32_1, "");
-   return result;
+   return ac_build_gather_values(ctx, temps, 2);
 }
 
 static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
  nir_op op,
  LLVMValueRef src0)
 {
unsigned mask;
int idx;
LLVMValueRef result;
 
@@ -997,24 +992,21 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
ctx->ac.v2i32,
"");
result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
 ctx->ac.i32_1, "");
break;
}
 
case nir_op_pack_64_2x32_split: {
LLVMValueRef tmp = LLVMGetUndef(ctx->ac.v2i32);
-   tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
-src[0], ctx->ac.i32_0, "");
-   tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
-src[1], ctx->ac.i32_1, "");
+   tmp = ac_build_gather_values(>ac, src, 2);
result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, 
"");
break;
}
 
case nir_op_cube_face_coord: {
src[0] = ac_to_float(>ac, src[0]);
LLVMValueRef results[2];
LLVMValueRef in[3];
for (unsigned chan = 0; chan < 3; chan++)
in[chan] = ac_llvm_extract_elem(>ac, src[0], chan);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 66fe5fad218..cfd99b61601 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2717,26 +2717,24 @@ static void emit_streamout_output(struct 
si_shader_context *ctx,
 
/* Pack the output. */
LLVMValueRef vdata = NULL;
 
switch (num_comps) {
case 1: /* as i32 */
vdata = out[0];
break;
case 2: /* as v2i32 */
case 3: /* as v4i32 (aligned to 4) */
+   out[3] = LLVMGetUndef(ctx->i32);
+   /* fall through */
case 4: /* as v4i32 */
-   vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, 
util_next_power_of_two(num_comps)));
-   for (int j = 0; j < num_comps; j++) {
-   vdata = LLVMBuildInsertElement(ctx->ac.builder, vdata, 
out[j],
-  LLVMConstInt(ctx->i32, 
j, 0), "");
-   }
+   vdata = ac_build_gather_values(>ac, out, 
util_next_power_of_two(num_comps));
break;
}
 
ac_build_buffer_store_dword(>ac, so_buffers[buf_idx],
vdata, num_comps,
so_write_offsets[buf_idx],
ctx->i32_0,
stream_out->dst_offset * 4, 1, 1, true, 
false);
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 

[Mesa-dev] [PATCH 4/9] radeonsi: print the shader stage name when printing LLVM IR

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 354c05e3d9d..81c825db1e4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6987,21 +6987,22 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
ctx.shader->config.private_mem_vgprs =
ac_count_scratch_private_memory(ctx.main_fn);
}
 
/* Make sure the input is a pointer and not integer followed by 
inttoptr. */
assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn, 0))) ==
   LLVMPointerTypeKind);
 
/* Compile to bytecode. */
r = si_compile_llvm(sscreen, >binary, >config, compiler,
-   ctx.ac.module, debug, ctx.type, "TGSI shader",
+   ctx.ac.module, debug, ctx.type,
+   si_get_shader_name(shader, ctx.type),
si_should_optimize_less(compiler, 
shader->selector));
si_llvm_dispose();
if (r) {
fprintf(stderr, "LLVM failed to compile shader\n");
return r;
}
 
/* Validate SGPR and VGPR usage for compute to detect compiler bugs.
 * LLVM 3.9svn has this bug.
 */
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/9] radeonsi/gfx9: fix WAITCNT flags

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader_internal.h | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index ac7784f7d46..6cc503690da 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -282,23 +282,25 @@ LLVMValueRef si_nir_lookup_interp_param(struct 
ac_shader_abi *abi,
enum glsl_interp_mode interp,
unsigned location);
 
 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_instruction *inst,
const struct tgsi_opcode_info *info,
unsigned index,
LLVMValueRef dst[4]);
 
 /* Combine these with & instead of |. */
-#define NOOP_WAITCNT 0xf7f
-#define LGKM_CNT 0x07f
-#define VM_CNT 0xf70
+#define NOOP_WAITCNT 0xcf7f
+#define LGKM_CNT 0xc07f
+#define EXP_CNT 0xcf0f
+/* On GFX9, vmcnt has 6 bits in [0:3] and [14:15] */
+#define VM_CNT 0x0f70
 
 LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx,
   const struct tgsi_ind_register *ind,
   unsigned addr_mul, int rel_index);
 LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx,
   const struct tgsi_ind_register *ind,
   int rel_index, unsigned num);
 LLVMValueRef si_get_sample_id(struct si_shader_context *ctx);
 
 void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/9] radeonsi: use is_merged shader in si_prolog_get_rw_buffers

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

needed to change the input type to si_shader_context
---
 src/gallium/drivers/radeonsi/si_shader.c | 32 +++-
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 551671f4021..354c05e3d9d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -94,29 +94,29 @@ static void si_build_ps_epilog_function(struct 
si_shader_context *ctx,
 
 static bool llvm_type_is_64bit(struct si_shader_context *ctx,
   LLVMTypeRef type)
 {
if (type == ctx->ac.i64 || type == ctx->ac.f64)
return true;
 
return false;
 }
 
-static bool is_merged_shader(struct si_shader *shader)
+static bool is_merged_shader(struct si_shader_context *ctx)
 {
-   if (shader->selector->screen->info.chip_class <= VI)
+   if (ctx->screen->info.chip_class <= VI)
return false;
 
-   return shader->key.as_ls ||
-  shader->key.as_es ||
-  shader->selector->type == PIPE_SHADER_TESS_CTRL ||
-  shader->selector->type == PIPE_SHADER_GEOMETRY;
+   return ctx->shader->key.as_ls ||
+  ctx->shader->key.as_es ||
+  ctx->type == PIPE_SHADER_TESS_CTRL ||
+  ctx->type == PIPE_SHADER_GEOMETRY;
 }
 
 static void si_init_function_info(struct si_function_info *fninfo)
 {
fninfo->num_params = 0;
fninfo->num_sgpr_params = 0;
 }
 
 static unsigned add_arg_assign(struct si_function_info *fninfo,
enum si_arg_regfile regfile, LLVMTypeRef type,
@@ -6573,21 +6573,21 @@ static void si_build_wrapper_function(struct 
si_shader_context *ctx,
assert(ac_is_sgpr_param(param) == (gprs < num_sgprs));
assert(gprs + size <= num_sgprs + num_vgprs &&
   (gprs >= num_sgprs || gprs + size <= num_sgprs));
 
gprs += size;
}
 
si_create_function(ctx, "wrapper", NULL, 0, ,
   si_get_max_workgroup_size(ctx->shader));
 
-   if (is_merged_shader(ctx->shader))
+   if (is_merged_shader(ctx))
ac_init_exec_full_mask(>ac);
 
/* Record the arguments of the function as if they were an output of
 * a previous part.
 */
num_out = 0;
num_out_sgpr = 0;
 
for (unsigned i = 0; i < fninfo.num_params; ++i) {
LLVMValueRef param = LLVMGetParam(ctx->main_fn, i);
@@ -6631,21 +6631,21 @@ static void si_build_wrapper_function(struct 
si_shader_context *ctx,
/* Now chain the parts. */
for (unsigned part = 0; part < num_parts; ++part) {
LLVMValueRef in[48];
LLVMValueRef ret;
LLVMTypeRef ret_type;
unsigned out_idx = 0;
unsigned num_params = LLVMCountParams(parts[part]);
 
/* Merged shaders are executed conditionally depending
 * on the number of enabled threads passed in the input SGPRs. 
*/
-   if (is_merged_shader(ctx->shader) && part == 0) {
+   if (is_merged_shader(ctx) && part == 0) {
LLVMValueRef ena, count = initial[3];
 
count = LLVMBuildAnd(builder, count,
 LLVMConstInt(ctx->i32, 0x7f, 0), 
"");
ena = LLVMBuildICmp(builder, LLVMIntULT,
ac_get_thread_id(>ac), count, 
"");
lp_build_if(_state, >gallivm, ena);
}
 
/* Derive arguments for the next part from outputs of the
@@ -6693,21 +6693,21 @@ static void si_build_wrapper_function(struct 
si_shader_context *ctx,
arg = LLVMBuildBitCast(builder, arg, 
param_type, "");
}
}
 
in[param_idx] = arg;
out_idx += param_size;
}
 
ret = LLVMBuildCall(builder, parts[part], in, num_params, "");
 
-   if (is_merged_shader(ctx->shader) &&
+   if (is_merged_shader(ctx) &&
part + 1 == next_shader_first_part) {
lp_build_endif(_state);
 
/* The second half of the merged shader should use
 * the inputs from the toplevel (wrapper) function,
 * not the return value from the last call.
 *
 * That's because the last call was executed condi-
 * tionally, so we can't consume it in the main
 * block.
@@ -7027,21 +7027,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
/* Just terminate the process, because dependent

  1   2   >