[Mesa-dev] [PATCH] gallium: add Tegra renderonly support

2017-01-12 Thread Alexandre Courbot
From: Christian Gmeiner 

Based on the same model as the IMX driver, opens a Nouveau render device
in order to transparently provide acceleration on Tegra.

Signed-off-by: Christian Gmeiner 
[acour...@nvidia.com: port to latest branch, minor improvements]
Signed-off-by: Alexandre Courbot 
---
 configure.ac   |  9 ++-
 src/gallium/Makefile.am|  4 ++
 .../auxiliary/pipe-loader/pipe_loader_drm.c|  5 ++
 src/gallium/auxiliary/target-helpers/drm_helper.h  | 22 +++
 .../auxiliary/target-helpers/drm_helper_public.h   |  3 +
 src/gallium/drivers/nouveau/nouveau_buffer.c   |  3 +
 src/gallium/drivers/nouveau/nouveau_buffer.h   |  2 +
 src/gallium/drivers/nouveau/nouveau_screen.h   |  3 +
 src/gallium/drivers/nouveau/nv50/nv50_miptree.c|  3 +
 src/gallium/drivers/nouveau/nvc0/nvc0_resource.c   | 18 +-
 src/gallium/drivers/tegra/Automake.inc |  9 +++
 src/gallium/drivers/tegra/Makefile.am  |  9 +++
 src/gallium/targets/dri/Makefile.am|  2 +
 src/gallium/targets/dri/target.c   | 11 
 .../winsys/nouveau/drm/nouveau_drm_public.h|  2 +
 .../winsys/nouveau/drm/nouveau_drm_winsys.c| 10 +++
 src/gallium/winsys/tegra/drm/Android.mk| 33 ++
 src/gallium/winsys/tegra/drm/Makefile.am   | 34 ++
 src/gallium/winsys/tegra/drm/Makefile.sources  |  3 +
 src/gallium/winsys/tegra/drm/tegra_drm_public.h|  8 +++
 src/gallium/winsys/tegra/drm/tegra_drm_winsys.c| 74 ++
 src/mesa/drivers/dri/nouveau/nouveau_screen.h  |  1 +
 22 files changed, 265 insertions(+), 3 deletions(-)
 create mode 100644 src/gallium/drivers/tegra/Automake.inc
 create mode 100644 src/gallium/drivers/tegra/Makefile.am
 create mode 100644 src/gallium/winsys/tegra/drm/Android.mk
 create mode 100644 src/gallium/winsys/tegra/drm/Makefile.am
 create mode 100644 src/gallium/winsys/tegra/drm/Makefile.sources
 create mode 100644 src/gallium/winsys/tegra/drm/tegra_drm_public.h
 create mode 100644 src/gallium/winsys/tegra/drm/tegra_drm_winsys.c

diff --git a/configure.ac b/configure.ac
index bc92fb5535c1..4b90b65805e3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1234,7 +1234,7 @@ GALLIUM_DRIVERS_DEFAULT="r300,r600,svga,swrast"
 AC_ARG_WITH([gallium-drivers],
 [AS_HELP_STRING([--with-gallium-drivers@<:@=DIRS...@:>@],
 [comma delimited Gallium drivers list, e.g.
-
"i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,etnaviv,imx"
+
"i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,etnaviv,imx,tegra"
 @<:@default=r300,r600,svga,swrast@:>@])],
 [with_gallium_drivers="$withval"],
 [with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT"])
@@ -2559,6 +2559,10 @@ if test -n "$with_gallium_drivers"; then
 require_libdrm "virgl"
 require_basic_egl "virgl"
 ;;
+xtegra)
+HAVE_GALLIUM_TEGRA=yes
+require_libdrm "tegra"
+;;
 *)
 AC_MSG_ERROR([Unknown Gallium driver: $driver])
 ;;
@@ -2643,6 +2647,7 @@ AM_CONDITIONAL(HAVE_GALLIUM_NOUVEAU, test 
"x$HAVE_GALLIUM_NOUVEAU" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_FREEDRENO, test "x$HAVE_GALLIUM_FREEDRENO" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_ETNAVIV, test "x$HAVE_GALLIUM_ETNAVIV" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_IMX, test "x$HAVE_GALLIUM_IMX" = xyes)
+AM_CONDITIONAL(HAVE_GALLIUM_TEGRA, test "x$HAVE_GALLIUM_TEGRA" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_SOFTPIPE, test "x$HAVE_GALLIUM_SOFTPIPE" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_LLVMPIPE, test "x$HAVE_GALLIUM_LLVMPIPE" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_SWR, test "x$HAVE_GALLIUM_SWR" = xyes)
@@ -2800,6 +2805,7 @@ AC_CONFIG_FILES([Makefile
src/gallium/drivers/imx/Makefile
src/gallium/drivers/vc4/Makefile
src/gallium/drivers/virgl/Makefile
+   src/gallium/drivers/tegra/Makefile
src/gallium/state_trackers/clover/Makefile
src/gallium/state_trackers/dri/Makefile
src/gallium/state_trackers/glx/xlib/Makefile
@@ -2844,6 +2850,7 @@ AC_CONFIG_FILES([Makefile
src/gallium/winsys/vc4/drm/Makefile
src/gallium/winsys/virgl/drm/Makefile
src/gallium/winsys/virgl/vtest/Makefile
+   src/gallium/winsys/tegra/drm/Makefile
src/gbm/Makefile
src/gbm/main/gbm.pc
src/glx/Makefile
diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am
index f910f3187eb4..0f887d220d5b 100644
--- a/src/gallium/Makefile.am
+++ b/src/gallium/Makefile.am
@@ -100,6 +100,10 @@ if HAVE_GALLIUM_VIRGL
 SUBDIRS += drivers/virgl winsys/virgl/drm winsys/virgl/vtest
 endif
 
+if HAVE_GALLIUM_TEGRA
+SUBDIRS += drivers/tegra 

Re: [Mesa-dev] [PATCH v2] radv: remove some unused macros and functions

2017-01-12 Thread Bas Nieuwenhuizen
Pushed, thanks.

On Wed, Jan 11, 2017 at 2:31 AM, Grazvydas Ignotas  wrote:
> These seem unlikely to be used.
> Also remove irrelevant comment about SKL.
>
> v2: forgot to rebase on master
>
> Signed-off-by: Grazvydas Ignotas 
> ---
> no commit access
>
>  src/amd/vulkan/radv_private.h | 15 +--
>  src/amd/vulkan/radv_util.c| 19 ---
>  2 files changed, 1 insertion(+), 33 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index ebda49d..fc3cbca 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -79,8 +79,7 @@ typedef uint32_t xcb_window_t;
>  #define MAX_SCISSORS16
>  #define MAX_PUSH_CONSTANTS_SIZE 128
>  #define MAX_DYNAMIC_BUFFERS 16
> -#define MAX_IMAGES 8
> -#define MAX_SAMPLES_LOG2 4 /* SKL supports 16 samples */
> +#define MAX_SAMPLES_LOG2 4
>  #define NUM_META_FS_KEYS 11
>
>  #define NUM_DEPTH_CLEAR_PIPELINES 3
> @@ -112,7 +111,6 @@ enum {
> RADV_DEBUG_UNSAFE_MATH   =  0x80,
>  };
>
> -#define radv_noreturn __attribute__((__noreturn__))
>  #define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
>
>  static inline uint32_t
> @@ -248,9 +246,6 @@ void radv_loge_v(const char *format, va_list va);
>  #define radv_assert(x)
>  #endif
>
> -void radv_abortf(const char *format, ...) radv_noreturn radv_printflike(1, 
> 2);
> -void radv_abortfv(const char *format, va_list va) radv_noreturn;
> -
>  #define stub_return(v) \
> do {\
> radv_finishme("stub %s", __func__); \
> @@ -1328,12 +1323,4 @@ RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, 
> VkRenderPass)
>  RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, VkSampler)
>  RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_shader_module, VkShaderModule)
>
> -#define RADV_DEFINE_STRUCT_CASTS(__radv_type, __VkType)  
>   \
> -   \
> -   static inline const __VkType *  \
> -   __radv_type ## _to_ ## __VkType(const struct __radv_type *__radv_obj) 
> \
> -   {   \
> -   return (const __VkType *) __radv_obj;   \
> -   }
> -
>  #endif /* RADV_PRIVATE_H */
> diff --git a/src/amd/vulkan/radv_util.c b/src/amd/vulkan/radv_util.c
> index 8c7a948..c642bb7 100644
> --- a/src/amd/vulkan/radv_util.c
> +++ b/src/amd/vulkan/radv_util.c
> @@ -65,25 +65,6 @@ void radv_printflike(3, 4)
> fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer);
>  }
>
> -void radv_noreturn radv_printflike(1, 2)
> -   radv_abortf(const char *format, ...)
> -{
> -   va_list va;
> -
> -   va_start(va, format);
> -   radv_abortfv(format, va);
> -   va_end(va);
> -}
> -
> -void radv_noreturn
> -radv_abortfv(const char *format, va_list va)
> -{
> -   fprintf(stderr, "vk: error: ");
> -   vfprintf(stderr, format, va);
> -   fprintf(stderr, "\n");
> -   abort();
> -}
> -
>  VkResult
>  __vk_errorf(VkResult error, const char *file, int line, const char *format, 
> ...)
>  {
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/22] i965/vec4: split DF instructions and later duplicate its execsize in IVB/VLV

2017-01-12 Thread Samuel Iglesias Gonsálvez
On Thu, 2017-01-12 at 15:39 -0800, Matt Turner wrote:
> On Thu, Jan 5, 2017 at 5:07 AM, Samuel Iglesias Gonsálvez
>  wrote:
> > We need to split DF instructions in two on IVB/VLV as it needs an
> > execsize 8 to process 4 DF values (one GRF in total).
> > 
> > Signed-off-by: Samuel Iglesias Gonsálvez 
> > ---
> >  src/mesa/drivers/dri/i965/brw_ir_vec4.h  |  1 +
> >  src/mesa/drivers/dri/i965/brw_vec4.cpp   | 20
> > +++-
> >  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 15
> > +--
> >  3 files changed, 33 insertions(+), 3 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> > b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> > index 57fc6be..9291be0 100644
> > --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> > +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> > @@ -291,6 +291,7 @@ public:
> > bool can_do_writemask(const struct gen_device_info *devinfo);
> > bool can_change_types() const;
> > bool has_source_and_destination_hazard() const;
> > +   unsigned exec_data_size() const;
> > 
> > bool is_align1_partial_write()
> > {
> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> > b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> > index b5e846d..6ba85d7 100644
> > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> > @@ -301,6 +301,19 @@ vec4_instruction::can_change_types() const
> >  !src[1].abs && !src[1].negate));
> >  }
> > 
> > +unsigned
> > +vec4_instruction::exec_data_size() const
> > +{
> > +  unsigned exec_data_size = 0;
> > +
> > +  for (int i = 0; i < 3; i++) {
> > +if (this->src[i].type != BAD_FILE)
> > +  exec_data_size = MAX2(exec_data_size, type_sz(this-
> > >src[i].type));
> 
> Indentation, use braces.
> 
> > +  }
> > +
> > +  return exec_data_size;
> > +}
> 
> I think this should become a static inline function for the reasons
> Curro mentioned in the fs patch.
> 

OK, thanks!

Sam
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] travis: Add the new drivers etnaviv and imx

2017-01-12 Thread Christian Gmeiner
2017-01-13 5:06 GMT+01:00 Rhys Kidd :
> Signed-off-by: Rhys Kidd 
> ---
>  .travis.yml | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/.travis.yml b/.travis.yml
> index 3d107aa..dfbc052 100644
> --- a/.travis.yml
> +++ b/.travis.yml
> @@ -32,7 +32,7 @@ env:
>  - DRI3PROTO_VERSION=dri3proto-1.0
>  - PRESENTPROTO_VERSION=presentproto-1.0
>  - LIBPCIACCESS_VERSION=libpciaccess-0.13.4
> -- LIBDRM_VERSION=libdrm-2.4.65
> +- LIBDRM_VERSION=libdrm-2.4.74
>  - XCBPROTO_VERSION=xcb-proto-1.11
>  - LIBXCB_VERSION=libxcb-1.11
>  - LIBXSHMFENCE_VERSION=libxshmfence-1.2
> @@ -92,7 +92,7 @@ install:
>
>- wget http://dri.freedesktop.org/libdrm/$LIBDRM_VERSION.tar.bz2
>- tar -jxvf $LIBDRM_VERSION.tar.bz2
> -  - (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 && 
> make install)
> +  - (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 
> --enable-etnaviv-experimental-api && make install)
>
>- wget $XORG_RELEASES/lib/$LIBXSHMFENCE_VERSION.tar.bz2
>- tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2
> @@ -103,7 +103,7 @@ script:
>./autogen.sh --enable-debug
>  --with-egl-platforms=x11,drm
>  --with-dri-drivers=i915,i965,radeon,r200,swrast,nouveau
> ---with-gallium-drivers=svga,swrast,vc4,virgl,r300,r600
> +--with-gallium-drivers=svga,swrast,vc4,virgl,r300,r600,etnaviv,imx
>  --disable-llvm-shared-libs
>  ;
>make && make check;
> --
> 2.9.3
>

Reviewed-by: Christian Gmeiner 

greets
--
Christian Gmeiner, MSc

https://www.youtube.com/user/AloryOFFICIAL
https://soundcloud.com/christian-gmeiner
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/22] i965/fs: add helper to retrieve instruction data size

2017-01-12 Thread Samuel Iglesias Gonsálvez
On Thu, 2017-01-12 at 15:14 -0800, Matt Turner wrote:
> On Thu, Jan 5, 2017 at 5:07 AM, Samuel Iglesias Gonsálvez
>  wrote:
> > From: "Juan A. Suarez Romero" 
> > 
> > The execution data size is the biggest type size of any instruction
> > operand.
> > 
> > We will use it to know if the instruction deals with DF, because in
> > Ivy
> > we need to duplicate the execution size and regioning parameters.
> > ---
> >  src/mesa/drivers/dri/i965/brw_fs.cpp  | 19 ++-
> >  src/mesa/drivers/dri/i965/brw_ir_fs.h |  1 +
> >  2 files changed, 15 insertions(+), 5 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> > b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > index c8a0693..eb3b4aa 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > @@ -340,6 +340,19 @@ fs_inst::has_source_and_destination_hazard()
> > const
> > }
> >  }
> > 
> > +unsigned
> > +fs_inst::exec_data_size() const
> > +{
> > +  unsigned exec_data_size = 0;
> > +
> > +  for (int i = 0; i < this->sources; i++) {
> > +if (this->src[i].type != BAD_FILE)
> > +  exec_data_size = MAX2(exec_data_size, type_sz(this-
> > >src[i].type));
> 
> Indentation is wrong (looks like 2 space, but should be 3 space).
> Also
> please use braces in nested control flow.
> 

OK

> > +  }
> > +
> > +  return exec_data_size;
> > +}
> > +
> >  bool
> >  fs_inst::is_copy_payload(const brw::simple_allocator _alloc)
> > const
> >  {
> > @@ -4577,11 +4590,7 @@ get_fpu_lowered_simd_width(const struct
> > gen_device_info *devinfo,
> > !inst->force_writemask_all) {
> >    const unsigned channels_per_grf = inst->exec_size /
> >   DIV_ROUND_UP(inst->size_written, REG_SIZE);
> > -  unsigned exec_type_size = 0;
> > -  for (int i = 0; i < inst->sources; i++) {
> > - if (inst->src[i].file != BAD_FILE)
> > -exec_type_size = MAX2(exec_type_size, type_sz(inst-
> > >src[i].type));
> > -  }
> > +  unsigned exec_type_size = inst->exec_data_size();
> >    assert(exec_type_size);
> > 
> >    /* The hardware shifts exactly 8 channels per compressed
> > half of the
> > diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h
> > b/src/mesa/drivers/dri/i965/brw_ir_fs.h
> > index cad3712..9875f2d 100644
> > --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
> > +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
> > @@ -349,6 +349,7 @@ public:
> > bool can_change_types() const;
> > bool has_side_effects() const;
> > bool has_source_and_destination_hazard() const;
> > +   unsigned exec_data_size() const;
> 
> In the reworked patch in your new branch, this still exists even
> though the function is no longer a member.
> 

Right, I forgot to delete it.

Thanks!

Sam

signature.asc
Description: This is a digitally signed message part
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv/pipeline: Replace get_fs_input_map with get_last_vue_prog_data

2017-01-12 Thread Lionel Landwerlin

On 13/01/17 06:45, Lionel Landwerlin wrote:

On 13/01/17 01:08, Jason Ekstrand wrote:

This lets us delete a helper from genX_pipeline.c
---
  src/intel/vulkan/anv_pipeline.c  |  2 +-
  src/intel/vulkan/anv_private.h   | 10 +-
  src/intel/vulkan/genX_pipeline.c | 20 
  3 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline.c 
b/src/intel/vulkan/anv_pipeline.c

index 6c939b0..a86dd98 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -270,7 +270,7 @@ populate_wm_prog_key(const struct anv_pipeline 
*pipeline,
 /* TODO: we could set this to 0 based on the information in 
nir_shader, but

  * this function is called before spirv_to_nir. */
 const struct brw_vue_map *vue_map =
-  anv_pipeline_get_fs_input_map(pipeline);
+ _pipeline_get_last_vue_prog_data(pipeline)->vue_map;
 key->input_slots_valid = vue_map->slots_valid;
   /* Vulkan doesn't specify a default */
diff --git a/src/intel/vulkan/anv_private.h 
b/src/intel/vulkan/anv_private.h

index dbc8c3c..ce2b591 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1482,15 +1482,15 @@ ANV_DECL_GET_PROG_DATA_FUNC(gs, 
MESA_SHADER_GEOMETRY)

  ANV_DECL_GET_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
  ANV_DECL_GET_PROG_DATA_FUNC(cs, MESA_SHADER_COMPUTE)
  -static inline const struct brw_vue_map *
-anv_pipeline_get_fs_input_map(const struct anv_pipeline *pipeline)
+static inline const struct brw_vue_prog_data *
+anv_pipeline_get_last_vue_prog_data(const struct anv_pipeline 
*pipeline)

  {


I just realized this doesn't seem right, we should check from 
TESS_EVAL before GEOMETRY right?


Dammit no. This is right, sorry.

Reviewed-by: Lionel Landwerlin 




 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
-  return _gs_prog_data(pipeline)->base.vue_map;
+  return _gs_prog_data(pipeline)->base;
 else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
-  return _tes_prog_data(pipeline)->base.vue_map;
+  return _tes_prog_data(pipeline)->base;
 else
-  return _vs_prog_data(pipeline)->base.vue_map;
+  return _vs_prog_data(pipeline)->base;
  }
VkResult
diff --git a/src/intel/vulkan/genX_pipeline.c 
b/src/intel/vulkan/genX_pipeline.c

index b58aac4..7fa68c0 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -286,7 +286,6 @@ static void
  emit_3dstate_sbe(struct anv_pipeline *pipeline)
  {
 const struct brw_wm_prog_data *wm_prog_data = 
get_wm_prog_data(pipeline);

-   const struct brw_vue_map *fs_input_map;
   if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
anv_batch_emit(>batch, GENX(3DSTATE_SBE), sbe);
@@ -296,7 +295,8 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline)
return;
 }
  -   fs_input_map = anv_pipeline_get_fs_input_map(pipeline);
+   const struct brw_vue_map *fs_input_map =
+ _pipeline_get_last_vue_prog_data(pipeline)->vue_map;
   struct GENX(3DSTATE_SBE) sbe = {
GENX(3DSTATE_SBE_header),
@@ -846,19 +846,6 @@ emit_cb_state(struct anv_pipeline *pipeline,
 }
  }
  -/**
- * Get the brw_vue_prog_data for the last stage which outputs VUEs.
- */
-static inline struct brw_vue_prog_data *
-get_last_vue_prog_data(struct anv_pipeline *pipeline)
-{
-   for (int s = MESA_SHADER_GEOMETRY; s >= 0; s--) {
-  if (pipeline->shaders[s])
- return (struct brw_vue_prog_data *) 
pipeline->shaders[s]->prog_data;

-   }
-   return NULL;
-}
-
  static void
  emit_3dstate_clip(struct anv_pipeline *pipeline,
const VkPipelineViewportStateCreateInfo *vp_info,
@@ -886,7 +873,8 @@ emit_3dstate_clip(struct anv_pipeline *pipeline,
clip.FrontWinding= 
vk_to_gen_front_face[rs_info->frontFace];
clip.CullMode= 
vk_to_gen_cullmode[rs_info->cullMode];

clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable;
-  const struct brw_vue_prog_data *last = 
get_last_vue_prog_data(pipeline);

+  const struct brw_vue_prog_data *last =
+ anv_pipeline_get_last_vue_prog_data(pipeline);
if (last) {
   clip.UserClipDistanceClipTestEnableBitmask = 
last->clip_distance_mask;
   clip.UserClipDistanceCullTestEnableBitmask = 
last->cull_distance_mask;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv/pipeline: Replace get_fs_input_map with get_last_vue_prog_data

2017-01-12 Thread Lionel Landwerlin

On 13/01/17 01:08, Jason Ekstrand wrote:

This lets us delete a helper from genX_pipeline.c
---
  src/intel/vulkan/anv_pipeline.c  |  2 +-
  src/intel/vulkan/anv_private.h   | 10 +-
  src/intel/vulkan/genX_pipeline.c | 20 
  3 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 6c939b0..a86dd98 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -270,7 +270,7 @@ populate_wm_prog_key(const struct anv_pipeline *pipeline,
 /* TODO: we could set this to 0 based on the information in nir_shader, but
  * this function is called before spirv_to_nir. */
 const struct brw_vue_map *vue_map =
-  anv_pipeline_get_fs_input_map(pipeline);
+  _pipeline_get_last_vue_prog_data(pipeline)->vue_map;
 key->input_slots_valid = vue_map->slots_valid;
  
 /* Vulkan doesn't specify a default */

diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index dbc8c3c..ce2b591 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1482,15 +1482,15 @@ ANV_DECL_GET_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
  ANV_DECL_GET_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
  ANV_DECL_GET_PROG_DATA_FUNC(cs, MESA_SHADER_COMPUTE)
  
-static inline const struct brw_vue_map *

-anv_pipeline_get_fs_input_map(const struct anv_pipeline *pipeline)
+static inline const struct brw_vue_prog_data *
+anv_pipeline_get_last_vue_prog_data(const struct anv_pipeline *pipeline)
  {


I just realized this doesn't seem right, we should check from TESS_EVAL 
before GEOMETRY right?



 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
-  return _gs_prog_data(pipeline)->base.vue_map;
+  return _gs_prog_data(pipeline)->base;
 else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
-  return _tes_prog_data(pipeline)->base.vue_map;
+  return _tes_prog_data(pipeline)->base;
 else
-  return _vs_prog_data(pipeline)->base.vue_map;
+  return _vs_prog_data(pipeline)->base;
  }
  
  VkResult

diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index b58aac4..7fa68c0 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -286,7 +286,6 @@ static void
  emit_3dstate_sbe(struct anv_pipeline *pipeline)
  {
 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
-   const struct brw_vue_map *fs_input_map;
  
 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {

anv_batch_emit(>batch, GENX(3DSTATE_SBE), sbe);
@@ -296,7 +295,8 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline)
return;
 }
  
-   fs_input_map = anv_pipeline_get_fs_input_map(pipeline);

+   const struct brw_vue_map *fs_input_map =
+  _pipeline_get_last_vue_prog_data(pipeline)->vue_map;
  
 struct GENX(3DSTATE_SBE) sbe = {

GENX(3DSTATE_SBE_header),
@@ -846,19 +846,6 @@ emit_cb_state(struct anv_pipeline *pipeline,
 }
  }
  
-/**

- * Get the brw_vue_prog_data for the last stage which outputs VUEs.
- */
-static inline struct brw_vue_prog_data *
-get_last_vue_prog_data(struct anv_pipeline *pipeline)
-{
-   for (int s = MESA_SHADER_GEOMETRY; s >= 0; s--) {
-  if (pipeline->shaders[s])
- return (struct brw_vue_prog_data *) pipeline->shaders[s]->prog_data;
-   }
-   return NULL;
-}
-
  static void
  emit_3dstate_clip(struct anv_pipeline *pipeline,
const VkPipelineViewportStateCreateInfo *vp_info,
@@ -886,7 +873,8 @@ emit_3dstate_clip(struct anv_pipeline *pipeline,
clip.FrontWinding= vk_to_gen_front_face[rs_info->frontFace];
clip.CullMode= vk_to_gen_cullmode[rs_info->cullMode];
clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable;
-  const struct brw_vue_prog_data *last = get_last_vue_prog_data(pipeline);
+  const struct brw_vue_prog_data *last =
+ anv_pipeline_get_last_vue_prog_data(pipeline);
if (last) {
   clip.UserClipDistanceClipTestEnableBitmask = 
last->clip_distance_mask;
   clip.UserClipDistanceCullTestEnableBitmask = 
last->cull_distance_mask;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [ANNOUNCE] mesa 12.0.5

2017-01-12 Thread Timo Aaltonen
On 13.01.2017 07:40, Emil Velikov wrote:
> On 12 January 2017 at 10:29, Timo Aaltonen  wrote:
>> On 11.01.2017 15:01, Emil Velikov wrote:
>>> On 6 December 2016 at 14:55, Marek Olšák  wrote:
 Hi,

 I'd like to announce that this release doesn't fix the worst GPU
 hangs/freezes it has. I'm talking about all Gallium drivers here.
 There was a bug recently discovered in shared code that leads to
 random GPU hangs with radeonsi, but all other Gallium drivers are also
 affected in "some negative way", which may include freezes. The fix
 was available prior to 12.0.5, but wasn't applied due to a process
 issue.

 It is still the best 12.0.x release, but users and distributions
 wanting better stability for non-Intel drivers should wait for 12.0.6.

>>> As some of you may know, I've mentioned that 12.0.6 will be available
>>> if we get at least a few developers/teams behind it.
>>>
>>> Since then people have contacted me, on and off list, speaking
>>> positively about having 12.0.6. As such there I'll be rolling the
>>> release.
>>> Not to mention that Michel went the extra mile with improved/extra
>>> patches on the topic/issue mentioned by Marek.
>>>
>>> Thank for the feedback everyone !
>>> Emil
>>
>> Is there a public branch with the proposed commits? mesa/12.0 hasn't
>> been touched since previous release. I'd need the release or a
>> preliminary branch ASAP for ubuntu..
>>
> A preliminary one, tested only locally:
> https://github.com/evelikov/Mesa/commits/mesa_12/jenkins
> 
> A proper one, alongside a summary email will follow shortly - as the
> test results arrive.

Great, thanks! Any reason mesa/12.0 can't be kept in sync with this?


-- 
t
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97067] WebGL: conformance/glsl/misc/shaders-with-invariance.html Fail

2017-01-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97067

--- Comment #4 from Tapani Pälli  ---
This test is passing on i965 with following versions:

Google Chrome 55.0.2883.87
Mesa 17.0.0-devel (git-0252ba2)

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50/ir: optimize shl + and

2017-01-12 Thread Ilia Mirkin
Address loading can often end up as shl + shr + shl combinations. The
latter two are equal shifts, which get converted into an and mask.
However if the previous shl is more than the mask is trying to remove
(in terms of low bits), we can just remove the and entirely. This
reduces some large shaders by as many as 3% of instructions (out of 2K).

total instructions in shared programs : 6495509 -> 6491076 (-0.07%)
total gprs used in shared programs: 954621 -> 954623 (0.00%)

localgpr   inst  bytes
helped   0   010141014
  hurt   0   2   0   0

Signed-off-by: Ilia Mirkin 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 28b5985..04b6af2 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1260,6 +1260,17 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
, int s)
  i->op = OP_EXTBF;
  i->setSrc(0, src->getSrc(0));
  i->setSrc(1, new_ImmediateValue(prog, ext));
+  } else if (src->op == OP_SHL &&
+ src->src(1).getImmediate(imm1) &&
+ i->src(t).mod == Modifier(0) &&
+ util_is_power_of_two(~imm0.reg.data.u32 + 1) &&
+ util_last_bit(~imm0.reg.data.u32) <= imm1.reg.data.u32) {
+ i->op = OP_MOV;
+ i->setSrc(s, NULL);
+ if (t) {
+i->setSrc(0, i->getSrc(t));
+i->setSrc(t, NULL);
+ }
   }
}
   break;
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [ANNOUNCE] mesa 12.0.5

2017-01-12 Thread Emil Velikov
On 12 January 2017 at 10:29, Timo Aaltonen  wrote:
> On 11.01.2017 15:01, Emil Velikov wrote:
>> On 6 December 2016 at 14:55, Marek Olšák  wrote:
>>> Hi,
>>>
>>> I'd like to announce that this release doesn't fix the worst GPU
>>> hangs/freezes it has. I'm talking about all Gallium drivers here.
>>> There was a bug recently discovered in shared code that leads to
>>> random GPU hangs with radeonsi, but all other Gallium drivers are also
>>> affected in "some negative way", which may include freezes. The fix
>>> was available prior to 12.0.5, but wasn't applied due to a process
>>> issue.
>>>
>>> It is still the best 12.0.x release, but users and distributions
>>> wanting better stability for non-Intel drivers should wait for 12.0.6.
>>>
>> As some of you may know, I've mentioned that 12.0.6 will be available
>> if we get at least a few developers/teams behind it.
>>
>> Since then people have contacted me, on and off list, speaking
>> positively about having 12.0.6. As such there I'll be rolling the
>> release.
>> Not to mention that Michel went the extra mile with improved/extra
>> patches on the topic/issue mentioned by Marek.
>>
>> Thank for the feedback everyone !
>> Emil
>
> Is there a public branch with the proposed commits? mesa/12.0 hasn't
> been touched since previous release. I'd need the release or a
> preliminary branch ASAP for ubuntu..
>
A preliminary one, tested only locally:
https://github.com/evelikov/Mesa/commits/mesa_12/jenkins

A proper one, alongside a summary email will follow shortly - as the
test results arrive.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 21/21] anv: Avoid some resolves for samplable HiZ buffers

2017-01-12 Thread Nanley Chery
On Thu, Jan 12, 2017 at 06:26:44PM -0800, Jason Ekstrand wrote:
> Good work!  Series is
> 
> Reviewed-by: Jason Ekstrand 
> 

Thanks!

-Nanley

> On Thu, Jan 12, 2017 at 5:34 PM, Nanley Chery  wrote:
> 
> > v2: Simplify nested ifs (Jason Ekstrand)
> >
> > Signed-off-by: Nanley Chery 
> > ---
> >  src/intel/vulkan/genX_cmd_buffer.c | 67 --
> > 
> >  1 file changed, 49 insertions(+), 18 deletions(-)
> >
> > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> > b/src/intel/vulkan/genX_cmd_buffer.c
> > index 59041f0458..f7894a0574 100644
> > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > @@ -311,11 +311,21 @@ need_input_attachment_state(const struct
> > anv_render_pass_attachment *att)
> >  }
> >
> >  static enum isl_aux_usage
> > -layout_to_hiz_usage(VkImageLayout layout)
> > +layout_to_hiz_usage(VkImageLayout layout, uint8_t samples)
> >  {
> > switch (layout) {
> > case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
> >return ISL_AUX_USAGE_HIZ;
> > +   case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
> > +   case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
> > +  if (anv_can_sample_with_hiz(GEN_GEN, samples))
> > + return ISL_AUX_USAGE_HIZ;
> > +  /* Fall-through */
> > +   case VK_IMAGE_LAYOUT_GENERAL:
> > +  /* This buffer could be used as a source or destination in a
> > transfer
> > +   * operation. Transfer operations current don't perform HiZ-enabled
> > reads
> > +   * and writes.
> > +   */
> > default:
> >return ISL_AUX_USAGE_NONE;
> > }
> > @@ -333,28 +343,48 @@ transition_depth_buffer(struct anv_cmd_buffer
> > *cmd_buffer,
> >  {
> > assert(image);
> >
> > -   if (image->aux_usage != ISL_AUX_USAGE_HIZ)
> > +   if (image->aux_usage != ISL_AUX_USAGE_HIZ || final_layout ==
> > initial_layout)
> >return;
> >
> > -   const bool hiz_enabled = layout_to_hiz_usage(initial_layout) ==
> > +   const bool hiz_enabled = layout_to_hiz_usage(initial_layout,
> > image->samples) ==
> >  ISL_AUX_USAGE_HIZ;
> > -   const bool enable_hiz = layout_to_hiz_usage(final_layout) ==
> > +   const bool enable_hiz = layout_to_hiz_usage(final_layout,
> > image->samples) ==
> > ISL_AUX_USAGE_HIZ;
> >
> > -   /* We've already initialized the aux HiZ buffer at BindImageMemory
> > time,
> > -* so there's no need to perform a HIZ resolve or clear to avoid GPU
> > hangs.
> > -* This initial layout indicates that the user doesn't care about the
> > data
> > -* that's currently in the buffer, so no resolves are necessary.
> > -*/
> > -   if (initial_layout == VK_IMAGE_LAYOUT_UNDEFINED)
> > -  return;
> > -
> > -   if (hiz_enabled == enable_hiz) {
> > -  /* The same buffer will be used, no resolves are necessary */
> > +   enum blorp_hiz_op hiz_op;
> > +   if (initial_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
> > +  /* We've already initialized the aux HiZ buffer at BindImageMemory
> > time,
> > +   * so there's no need to perform a HIZ resolve or clear to avoid
> > GPU hangs.
> > +   * This initial layout indicates that the user doesn't care about
> > the data
> > +   * that's currently in the buffer, so resolves are not necessary
> > except
> > +   * for the special case noted below.
> > +   */
> > +  hiz_op = BLORP_HIZ_OP_NONE;
> > } else if (hiz_enabled && !enable_hiz) {
> > -  anv_gen8_hiz_op_resolve(cmd_buffer, image,
> > BLORP_HIZ_OP_DEPTH_RESOLVE);
> > +  hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
> > +   } else if (!hiz_enabled && enable_hiz) {
> > +  hiz_op = BLORP_HIZ_OP_HIZ_RESOLVE;
> > } else {
> > -  assert(!hiz_enabled && enable_hiz);
> > +  assert(hiz_enabled == enable_hiz);
> > +  /* If the same buffer will be used, no resolves are necessary
> > except for
> > +   * the special case noted below.
> > +   */
> > +  hiz_op = BLORP_HIZ_OP_NONE;
> > +   }
> > +
> > +   if (hiz_op != BLORP_HIZ_OP_NONE)
> > +  anv_gen8_hiz_op_resolve(cmd_buffer, image, hiz_op);
> > +
> > +   /* Images that have sampling with HiZ enabled cause all shader
> > sampling to
> > +* load data with the HiZ buffer. Therefore, in the case of
> > transitioning to
> > +* the general layout - which currently routes all writes to the depth
> > +* buffer - we must ensure that the HiZ buffer remains consistent with
> > the
> > +* depth buffer by performing an additional HIZ resolve if the
> > operation
> > +* required by this transition was not already a HiZ resolve.
> > +*/
> > +   if (final_layout == VK_IMAGE_LAYOUT_GENERAL &&
> > +   anv_can_sample_with_hiz(GEN_GEN, image->samples) &&
> > +   hiz_op != BLORP_HIZ_OP_HIZ_RESOLVE) {
> >anv_gen8_hiz_op_resolve(cmd_buffer, image,
> > BLORP_HIZ_OP_HIZ_RESOLVE);
> > }
> >  }
> > @@ -512,7 +542,7 

[Mesa-dev] [Bug 98604] [VDPAU, DRI3] Fullscreen flash video fails when hardware acceleration is enabled.

2017-01-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=98604

--- Comment #12 from Dieter Nützel  ---
(In reply to Michel Dänzer from comment #11)
> Can you get a backtrace of a crash with gdb?

Hello Michel,

no backtrace of a crash, but I could attach gdb on running konqueror process.
BBC weather side need JAVA.
Hope this help, too.

/opt/mesa> konqueror &
[2] 10181
/opt/mesa> Vector smash protection is enabled.
openjdk version "1.8.0_111"
OpenJDK Runtime Environment (IcedTea 3.2.0) (suse-33.1-x86_64)
OpenJDK 64-Bit Server VM (build 25.111-b14, mixed mode)
konqueror(10181)/kio (KIOJob) KIO::TransferJob::slotData: mimeType() not
emitted when sending first data!; job URL =
KUrl("http://s.update.rubiconproject.com/2/873648/analytics.js?si=52926=www.bbc.com==6cada3a9-2f5f-48b1-861f-31fc282cc43f=8736481428691810142000;)
data size = 0 
konqueror(10181)/kio (KIOJob) KIO::TransferJob::slotData: mimeType() not
emitted when sending first data!; job URL =
KUrl("http://ps.eyeota.net/match/bounce/?bid=i0r4o4v=LAuIY6Hs;) data size =
0

(gdb) bt full
#0  0x7faaa40fe2bd in poll () at ../sysdeps/unix/syscall-template.S:81
#1  0x7faa9f4a5402 in _xcb_conn_wait () at /usr/lib64/libxcb.so.1
#2  0x7faa9f4a7209 in xcb_wait_for_special_event () at
/usr/lib64/libxcb.so.1
#3  0x7fa9e961e4a6 in dri3_wait_present_events (scrn=scrn@entry=0x2e2f090)
at vl/vl_winsys_dri3.c:183
ev = 
#4  0x7fa9e961f3bc in vl_dri3_screen_texture_from_drawable
(vscreen=0x2e2f090, drawable=) at vl/vl_winsys_dri3.c:558
scrn = 0x2e2f090
buffer = 
#5  0x7fa9e961b119 in vlVdpPresentationQueueDisplay
(presentation_queue=, surface=5, clip_width=0, clip_height=0,
earliest_presentation_time=0) at presentation.c:234
dump_window = 0
pq = 0x2f0ab60
surf = 0x4198340
pipe = 0x2ef3fe0
tex = 
surf_templ =
  {reference = {count = 0}, texture = 0x4, context =
0x1, format = PIPE_FORMAT_NONE, width = 0, height = 0, writable = 0, u
= {tex = {level = 3, first_layer = 0, last_layer = 0}, buf = {first_element =
3, last_element = 0}}}
surf_draw = 
src_rect = {x0 = 1, x1 = 16809983, y0 = 68781024, y1 = 0}
dst_clip = {x0 = 68780956, x1 = 0, y0 = 68780992, y1 = 0}
dirty_area = 
compositor = 0x2e2ef48
cstate = 0x2f0ab70
vscreen = 0x2e2f090
#6  0x7faa2efc37f3 in  () at /usr/lib64/browser-plugins/libflashplayer.so
#7  0x7faa2ed522e9 in  () at /usr/lib64/browser-plugins/libflashplayer.so
#8  0x7faa2ed524ed in  () at /usr/lib64/browser-plugins/libflashplayer.so
#9  0x7faa2ed4fc5c in  () at /usr/lib64/browser-plugins/libflashplayer.so
#10 0x7faa2eca69a9 in  () at /usr/lib64/browser-plugins/libflashplayer.so
#11 0x7faa2ec2c390 in  () at /usr/lib64/browser-plugins/libflashplayer.so
#12 0x7faa2f1c472f in  () at /usr/lib64/browser-plugins/libflashplayer.so
#13 0x7faa2f1c57ec in  () at /usr/lib64/browser-plugins/libflashplayer.so
#14 0x7faa2f12b191 in  () at /usr/lib64/browser-plugins/libflashplayer.so
#15 0x15aebcff63f6 in  ()
#16 0x0dd1873ddb78 in  ()
#17 0x0dd1873ddae0 in  ()
#18 0x1be706c23128 in  ()
#19 0x1be706c23061 in  ()
#20 0x in  ()

(gdb) info registers all
rax0xfdfc   -516
rbx0x2f00bb049286064
rcx0x7faaa40fe2bd   140370873672381
rdx0x   4294967295
rsi0x1  1
rdi0x7fffb0fac080   140736162611328
rbp0x29b7e600x29b7e60
rsp0x7fffb0fac070   0x7fffb0fac070
r8 0x0  0
r9 0x94 148
r100x0  0
r110x293659
r120x29b7e7843744888
r130x0  0
r140x0  0
r150x7fffb0fac080   140736162611328
rip0x7faaa40fe2bd   0x7faaa40fe2bd 
eflags 0x293[ CF AF SF IF ]
cs 0x33 51
ss 0x2b 43
ds 0x0  0
es 0x0  0
fs 0x0  0
gs 0x0  0
st0-nan(0x0fe2b2b2b)(raw 0xfe2b2b2b)
st1-nan(0x2c002b002b002b)   (raw 0x002c002b002b002b)
st2-nan(0xd300d300d300d30)  (raw 0x0d300d300d300d30)
st3-inf (raw 0x)
st4-nan(0x1ff00ff00ff0) (raw 0x1ff00ff00ff0)
st5-inf (raw 0x)
st60(raw 0x)
st70(raw 0x)
fctrl  0x37f895
fstat  0x20 32
ftag   0x   65535
fiseg  0x7faa   32682
fioff  0x29bf75ed   700413421
foseg  0x7fff   32767
fooff  0xb0fabb38   -1325745352
fop0x55c1372
xmm0   {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
v16_int8 = {0x0 }, v8_int16 = 

Re: [Mesa-dev] [PATCH v2 03/21] anv: Replace anv_image_has_hiz() with ISL_AUX_USAGE_HIZ

2017-01-12 Thread Nanley Chery
On Thu, Jan 12, 2017 at 06:17:58PM -0800, Jason Ekstrand wrote:
> On Thu, Jan 12, 2017 at 5:33 PM, Nanley Chery  wrote:
> 
> > The helper doesn't provide additional functionality over the current
> > infrastructure.
> >
> > v2: Add comment to anv_image::aux_usage (Jason Ekstrand)
> >
> > Signed-off-by: Nanley Chery 
> > ---
> >  src/intel/vulkan/anv_blorp.c   |  2 +-
> >  src/intel/vulkan/anv_image.c   | 10 --
> >  src/intel/vulkan/anv_private.h | 15 ---
> >  src/intel/vulkan/gen8_cmd_buffer.c |  2 +-
> >  src/intel/vulkan/genX_cmd_buffer.c |  2 +-
> >  5 files changed, 15 insertions(+), 16 deletions(-)
> >
> > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> > index 7e157b4d26..97ce236fc5 100644
> > --- a/src/intel/vulkan/anv_blorp.c
> > +++ b/src/intel/vulkan/anv_blorp.c
> > @@ -858,7 +858,7 @@ void anv_CmdClearDepthStencilImage(
> > struct blorp_surf depth, stencil;
> > if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
> >get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT,
> > -   image->aux_usage, );
> > +   ISL_AUX_USAGE_NONE, );
> > } else {
> >memset(, 0, sizeof(depth));
> > }
> > diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> > index f262d8a524..d821629191 100644
> > --- a/src/intel/vulkan/anv_image.c
> > +++ b/src/intel/vulkan/anv_image.c
> > @@ -195,6 +195,7 @@ make_surface(const struct anv_device *dev,
> >   isl_surf_get_hiz_surf(>isl_dev, >depth_surface.isl,
> > >aux_surface.isl);
> >   add_surface(image, >aux_surface);
> > + image->aux_usage = ISL_AUX_USAGE_HIZ;
> >}
> > } else if (aspect == VK_IMAGE_ASPECT_COLOR_BIT && vk_info->samples ==
> > 1) {
> >if (!unlikely(INTEL_DEBUG & DEBUG_NO_RBC)) {
> > @@ -523,6 +524,11 @@ anv_CreateImageView(VkDevice _device,
> >iview->isl.usage = 0;
> > }
> >
> > +   /* Sampling from HiZ is not yet enabled */
> > +   enum isl_aux_usage surf_usage = image->aux_usage;
> > +   if (surf_usage == ISL_AUX_USAGE_HIZ)
> > +  surf_usage = ISL_AUX_USAGE_NONE;
> > +
> > /* Input attachment surfaces for color or depth are allocated and
> > filled
> >  * out at BeginRenderPass time because they need compression
> > information.
> >  * Stencil image do not support compression so we just use the texture
> > @@ -540,7 +546,7 @@ anv_CreateImageView(VkDevice _device,
> >.surf = >isl,
> >.view = ,
> >.aux_surf = >aux_surface.isl,
> > -  .aux_usage = image->aux_usage,
> > +  .aux_usage = surf_usage,
> >.mocs = device->default_mocs);
> >
> >if (!device->info.has_llc)
> > @@ -564,7 +570,7 @@ anv_CreateImageView(VkDevice _device,
> >   .surf = >isl,
> >   .view = ,
> >   .aux_surf = >aux_surface.isl,
> > - .aux_usage = image->aux_usage,
> > + .aux_usage = surf_usage,
> >   .mocs = device->default_mocs);
> >} else {
> >   anv_fill_buffer_surface_state(device,
> > iview->storage_surface_state,
> > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
> > private.h
> > index dbc8c3cf68..26f2986005 100644
> > --- a/src/intel/vulkan/anv_private.h
> > +++ b/src/intel/vulkan/anv_private.h
> > @@ -1581,7 +1581,10 @@ struct anv_image {
> >};
> > };
> >
> > -   /** The aux usage for this surface when outside a render pass */
> > +   /**
> > +* The aux usage for this surface when outside a render pass
> > +* (and inside a render pass for HiZ-enabled depth stencil
> > attachments).
> > +*/
> >
> 
> How about:
> 
> For color images, this is the aux usage for this image when not used as a
> color attachment.
> 
> For depth/stencil images, this is set to ISL_AUX_USAGE_HIZ if the image has
> a HiZ buffer.
> 
> 

Sure. It's much clearer than what I had.

> > enum isl_aux_usage aux_usage;
> >
> > struct anv_surface aux_surface;
> > @@ -1642,16 +1645,6 @@ const struct anv_surface *
> >  anv_image_get_surface_for_aspect_mask(const struct anv_image *image,
> >VkImageAspectFlags aspect_mask);
> >
> > -static inline bool
> > -anv_image_has_hiz(const struct anv_image *image)
> > -{
> > -   /* We must check the aspect because anv_image::aux_surface may be used
> > for
> > -* any type of auxiliary surface, not just HiZ.
> > -*/
> > -   return (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
> > -  image->aux_surface.isl.size > 0;
> > -}
> > -
> >  struct anv_buffer_view {
> > enum isl_format format; /**< 

[Mesa-dev] [PATCH] travis: Add the new drivers etnaviv and imx

2017-01-12 Thread Rhys Kidd
Signed-off-by: Rhys Kidd 
---
 .travis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 3d107aa..dfbc052 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -32,7 +32,7 @@ env:
 - DRI3PROTO_VERSION=dri3proto-1.0
 - PRESENTPROTO_VERSION=presentproto-1.0
 - LIBPCIACCESS_VERSION=libpciaccess-0.13.4
-- LIBDRM_VERSION=libdrm-2.4.65
+- LIBDRM_VERSION=libdrm-2.4.74
 - XCBPROTO_VERSION=xcb-proto-1.11
 - LIBXCB_VERSION=libxcb-1.11
 - LIBXSHMFENCE_VERSION=libxshmfence-1.2
@@ -92,7 +92,7 @@ install:
 
   - wget http://dri.freedesktop.org/libdrm/$LIBDRM_VERSION.tar.bz2
   - tar -jxvf $LIBDRM_VERSION.tar.bz2
-  - (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 && 
make install)
+  - (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 
--enable-etnaviv-experimental-api && make install)
 
   - wget $XORG_RELEASES/lib/$LIBXSHMFENCE_VERSION.tar.bz2
   - tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2
@@ -103,7 +103,7 @@ script:
   ./autogen.sh --enable-debug
 --with-egl-platforms=x11,drm
 --with-dri-drivers=i915,i965,radeon,r200,swrast,nouveau
---with-gallium-drivers=svga,swrast,vc4,virgl,r300,r600
+--with-gallium-drivers=svga,swrast,vc4,virgl,r300,r600,etnaviv,imx
 --disable-llvm-shared-libs
 ;
   make && make check;
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] egl: correct surface_type when add config

2017-01-12 Thread Tomasz Figa
On Fri, Jan 13, 2017 at 1:18 AM, Emil Velikov  wrote:
> [Adding Tomasz]
>
> On 12 January 2017 at 07:14, Liu Zhiquan  wrote:
>> When add config, dri_config is double or single. Should only add
>> EGL_WINDOW_BIT to surface_type for double dri_config, Should only add
>> EGL_PBUFFER_BIT EGL_PIXMAP_BIT to surface_type for single dri_config.
>> This avoid crash when operate on wrong surface_type which
>> dri_double_config or dri_single_config is null.
>>
> Afaict you got it spot on - WINDOW are double, while PBUFFER and
> PIXMAP are single config.

I think this is not entirely true, at least according to EGL 1.4 spec [1].

PBUFFER and PIXMAP surfaces seems to be single buffered indeed,
although the wording used in section 2.2.2 is a bit fuzzy. It states
that WINDOW and PBUFFER surfaces are "back" buffered, while PIXMAP are
"single" buffered. This seems to be clarified reasonably enough by
section 3.9.1 that uses the "back-buffered" phrase only for WINDOW
surfaces.

However for WINDOW surfaces it seems to be a bit more complicated.
Section 3.9.1 mentions both "back-buffered" and "single-buffered"
modes for WINDOW surfaces. Moreover, in section 3.5.1, where
eglCreateWindowSurface is described, there is a mention of the
EGL_RENDER_BUFFER attribute that can be set to either EGL_BACK_BUFFER
or EGL_SINGLE_BUFFER. It also says that "some window systems may not
allow rendering directly to the front buffer of a window surface" and
that for such systems "when such windows are made current to a
context, the context will always have an EGL_RENDER_BUFFER attribute
value of EGL_BACK_BUFFER". So indeed, if none of our EGL platforms
(especially X11) relies on availability of single buffering for window
surfaces, we might just use that exception and always report
EGL_BACK_BUFFER for window surfaces (note that we don't do it
currently and we report what was given at surface creation time).

[1] https://www.khronos.org/registry/EGL/specs/eglspec.1.4.pdf

Best regards,
Tomasz
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] NaN behavior in GLSL (was Re: [PATCH] glsl: always do sqrt(abs()) and inversesqrt(abs()))

2017-01-12 Thread Ilia Mirkin
On Thu, Jan 12, 2017 at 9:13 PM, Jason Ekstrand  wrote:
> Unless, of course, it's controlled by the same hardware bit... Clearly, we
> can can give you abs on rsq without denorm flushing (easy shader hacks) but
> not the other way around.

OK, so somehow I missed that earlier. However there's an interesting
section in the PRM:

https://01.org/sites/default/files/documentation/intel-gfx-prm-osrc-skl-vol07-3d_media_gpgpu.pdf

on PDF page 854, "Dismissed Legacy Behaviors" which has a list of
suggested IEEE 754 deviations for DX9. One of them is indeed that 0 *
x = 0, but another is that input NaNs be propagated with certain
exceptions. Also they suggest that RCP(0)/RSQ(0) = fmax. Interesting.

So at this point, the zero_wins thing is pretty much blown. i965
appears to have an all-or-nothing approach, and additionally that
approach doesn't match up exactly to what NVIDIA does (or at least I'm
not aware of a clamp-everything mode).

This will take some thought to figure out how something can be
specified so that a single spec works for both i965 and nv/amd. OTOH
we could have two different specs that just expose different things -
e.g. i965 could expose a MESA_shader_float_alt_mode or whatever which
is spec'd to do the things that the PRM says, and nv/amd have the
MESA_shader_float_zero_wins ext which does what we were talking about
earlier.

I'm open to other suggestions too.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 21/21] anv: Avoid some resolves for samplable HiZ buffers

2017-01-12 Thread Jason Ekstrand
Good work!  Series is

Reviewed-by: Jason Ekstrand 

On Thu, Jan 12, 2017 at 5:34 PM, Nanley Chery  wrote:

> v2: Simplify nested ifs (Jason Ekstrand)
>
> Signed-off-by: Nanley Chery 
> ---
>  src/intel/vulkan/genX_cmd_buffer.c | 67 --
> 
>  1 file changed, 49 insertions(+), 18 deletions(-)
>
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 59041f0458..f7894a0574 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -311,11 +311,21 @@ need_input_attachment_state(const struct
> anv_render_pass_attachment *att)
>  }
>
>  static enum isl_aux_usage
> -layout_to_hiz_usage(VkImageLayout layout)
> +layout_to_hiz_usage(VkImageLayout layout, uint8_t samples)
>  {
> switch (layout) {
> case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
>return ISL_AUX_USAGE_HIZ;
> +   case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
> +   case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
> +  if (anv_can_sample_with_hiz(GEN_GEN, samples))
> + return ISL_AUX_USAGE_HIZ;
> +  /* Fall-through */
> +   case VK_IMAGE_LAYOUT_GENERAL:
> +  /* This buffer could be used as a source or destination in a
> transfer
> +   * operation. Transfer operations current don't perform HiZ-enabled
> reads
> +   * and writes.
> +   */
> default:
>return ISL_AUX_USAGE_NONE;
> }
> @@ -333,28 +343,48 @@ transition_depth_buffer(struct anv_cmd_buffer
> *cmd_buffer,
>  {
> assert(image);
>
> -   if (image->aux_usage != ISL_AUX_USAGE_HIZ)
> +   if (image->aux_usage != ISL_AUX_USAGE_HIZ || final_layout ==
> initial_layout)
>return;
>
> -   const bool hiz_enabled = layout_to_hiz_usage(initial_layout) ==
> +   const bool hiz_enabled = layout_to_hiz_usage(initial_layout,
> image->samples) ==
>  ISL_AUX_USAGE_HIZ;
> -   const bool enable_hiz = layout_to_hiz_usage(final_layout) ==
> +   const bool enable_hiz = layout_to_hiz_usage(final_layout,
> image->samples) ==
> ISL_AUX_USAGE_HIZ;
>
> -   /* We've already initialized the aux HiZ buffer at BindImageMemory
> time,
> -* so there's no need to perform a HIZ resolve or clear to avoid GPU
> hangs.
> -* This initial layout indicates that the user doesn't care about the
> data
> -* that's currently in the buffer, so no resolves are necessary.
> -*/
> -   if (initial_layout == VK_IMAGE_LAYOUT_UNDEFINED)
> -  return;
> -
> -   if (hiz_enabled == enable_hiz) {
> -  /* The same buffer will be used, no resolves are necessary */
> +   enum blorp_hiz_op hiz_op;
> +   if (initial_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
> +  /* We've already initialized the aux HiZ buffer at BindImageMemory
> time,
> +   * so there's no need to perform a HIZ resolve or clear to avoid
> GPU hangs.
> +   * This initial layout indicates that the user doesn't care about
> the data
> +   * that's currently in the buffer, so resolves are not necessary
> except
> +   * for the special case noted below.
> +   */
> +  hiz_op = BLORP_HIZ_OP_NONE;
> } else if (hiz_enabled && !enable_hiz) {
> -  anv_gen8_hiz_op_resolve(cmd_buffer, image,
> BLORP_HIZ_OP_DEPTH_RESOLVE);
> +  hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
> +   } else if (!hiz_enabled && enable_hiz) {
> +  hiz_op = BLORP_HIZ_OP_HIZ_RESOLVE;
> } else {
> -  assert(!hiz_enabled && enable_hiz);
> +  assert(hiz_enabled == enable_hiz);
> +  /* If the same buffer will be used, no resolves are necessary
> except for
> +   * the special case noted below.
> +   */
> +  hiz_op = BLORP_HIZ_OP_NONE;
> +   }
> +
> +   if (hiz_op != BLORP_HIZ_OP_NONE)
> +  anv_gen8_hiz_op_resolve(cmd_buffer, image, hiz_op);
> +
> +   /* Images that have sampling with HiZ enabled cause all shader
> sampling to
> +* load data with the HiZ buffer. Therefore, in the case of
> transitioning to
> +* the general layout - which currently routes all writes to the depth
> +* buffer - we must ensure that the HiZ buffer remains consistent with
> the
> +* depth buffer by performing an additional HIZ resolve if the
> operation
> +* required by this transition was not already a HiZ resolve.
> +*/
> +   if (final_layout == VK_IMAGE_LAYOUT_GENERAL &&
> +   anv_can_sample_with_hiz(GEN_GEN, image->samples) &&
> +   hiz_op != BLORP_HIZ_OP_HIZ_RESOLVE) {
>anv_gen8_hiz_op_resolve(cmd_buffer, image,
> BLORP_HIZ_OP_HIZ_RESOLVE);
> }
>  }
> @@ -512,7 +542,7 @@ genX(cmd_buffer_setup_attachments)(struct
> anv_cmd_buffer *cmd_buffer,
>   } else {
>  if (iview->image->aux_usage == ISL_AUX_USAGE_HIZ) {
> state->attachments[i].aux_usage =
> -  layout_to_hiz_usage(att->initial_layout);
> +  layout_to_hiz_usage(att->initial_layout,

Re: [Mesa-dev] [PATCH v2 03/21] anv: Replace anv_image_has_hiz() with ISL_AUX_USAGE_HIZ

2017-01-12 Thread Jason Ekstrand
On Thu, Jan 12, 2017 at 5:33 PM, Nanley Chery  wrote:

> The helper doesn't provide additional functionality over the current
> infrastructure.
>
> v2: Add comment to anv_image::aux_usage (Jason Ekstrand)
>
> Signed-off-by: Nanley Chery 
> ---
>  src/intel/vulkan/anv_blorp.c   |  2 +-
>  src/intel/vulkan/anv_image.c   | 10 --
>  src/intel/vulkan/anv_private.h | 15 ---
>  src/intel/vulkan/gen8_cmd_buffer.c |  2 +-
>  src/intel/vulkan/genX_cmd_buffer.c |  2 +-
>  5 files changed, 15 insertions(+), 16 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index 7e157b4d26..97ce236fc5 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -858,7 +858,7 @@ void anv_CmdClearDepthStencilImage(
> struct blorp_surf depth, stencil;
> if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
>get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT,
> -   image->aux_usage, );
> +   ISL_AUX_USAGE_NONE, );
> } else {
>memset(, 0, sizeof(depth));
> }
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index f262d8a524..d821629191 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -195,6 +195,7 @@ make_surface(const struct anv_device *dev,
>   isl_surf_get_hiz_surf(>isl_dev, >depth_surface.isl,
> >aux_surface.isl);
>   add_surface(image, >aux_surface);
> + image->aux_usage = ISL_AUX_USAGE_HIZ;
>}
> } else if (aspect == VK_IMAGE_ASPECT_COLOR_BIT && vk_info->samples ==
> 1) {
>if (!unlikely(INTEL_DEBUG & DEBUG_NO_RBC)) {
> @@ -523,6 +524,11 @@ anv_CreateImageView(VkDevice _device,
>iview->isl.usage = 0;
> }
>
> +   /* Sampling from HiZ is not yet enabled */
> +   enum isl_aux_usage surf_usage = image->aux_usage;
> +   if (surf_usage == ISL_AUX_USAGE_HIZ)
> +  surf_usage = ISL_AUX_USAGE_NONE;
> +
> /* Input attachment surfaces for color or depth are allocated and
> filled
>  * out at BeginRenderPass time because they need compression
> information.
>  * Stencil image do not support compression so we just use the texture
> @@ -540,7 +546,7 @@ anv_CreateImageView(VkDevice _device,
>.surf = >isl,
>.view = ,
>.aux_surf = >aux_surface.isl,
> -  .aux_usage = image->aux_usage,
> +  .aux_usage = surf_usage,
>.mocs = device->default_mocs);
>
>if (!device->info.has_llc)
> @@ -564,7 +570,7 @@ anv_CreateImageView(VkDevice _device,
>   .surf = >isl,
>   .view = ,
>   .aux_surf = >aux_surface.isl,
> - .aux_usage = image->aux_usage,
> + .aux_usage = surf_usage,
>   .mocs = device->default_mocs);
>} else {
>   anv_fill_buffer_surface_state(device,
> iview->storage_surface_state,
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
> private.h
> index dbc8c3cf68..26f2986005 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -1581,7 +1581,10 @@ struct anv_image {
>};
> };
>
> -   /** The aux usage for this surface when outside a render pass */
> +   /**
> +* The aux usage for this surface when outside a render pass
> +* (and inside a render pass for HiZ-enabled depth stencil
> attachments).
> +*/
>

How about:

For color images, this is the aux usage for this image when not used as a
color attachment.

For depth/stencil images, this is set to ISL_AUX_USAGE_HIZ if the image has
a HiZ buffer.


> enum isl_aux_usage aux_usage;
>
> struct anv_surface aux_surface;
> @@ -1642,16 +1645,6 @@ const struct anv_surface *
>  anv_image_get_surface_for_aspect_mask(const struct anv_image *image,
>VkImageAspectFlags aspect_mask);
>
> -static inline bool
> -anv_image_has_hiz(const struct anv_image *image)
> -{
> -   /* We must check the aspect because anv_image::aux_surface may be used
> for
> -* any type of auxiliary surface, not just HiZ.
> -*/
> -   return (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
> -  image->aux_surface.isl.size > 0;
> -}
> -
>  struct anv_buffer_view {
> enum isl_format format; /**< VkBufferViewCreateInfo::format */
> struct anv_bo *bo;
> diff --git a/src/intel/vulkan/gen8_cmd_buffer.c
> b/src/intel/vulkan/gen8_cmd_buffer.c
> index 3e4aa9bc62..892a035304 100644
> --- a/src/intel/vulkan/gen8_cmd_buffer.c
> +++ b/src/intel/vulkan/gen8_cmd_buffer.c
> @@ -337,7 +337,7 @@ genX(cmd_buffer_emit_hz_op)(struct anv_cmd_buffer
> *cmd_buffer,
> 

Re: [Mesa-dev] NaN behavior in GLSL (was Re: [PATCH] glsl: always do sqrt(abs()) and inversesqrt(abs()))

2017-01-12 Thread Jason Ekstrand
On Jan 12, 2017 4:56 PM, "Ilia Mirkin"  wrote:

On Thu, Jan 12, 2017 at 7:46 PM, Matt Turner  wrote:
> On Thu, Jan 12, 2017 at 3:20 PM, Ilia Mirkin  wrote:
>> On Thu, Jan 12, 2017 at 6:04 PM, Nicolai Hähnle 
wrote:
>>> On 12.01.2017 23:46, Ilia Mirkin wrote:

 On Thu, Jan 12, 2017 at 4:03 PM, Matteo Bruni 
 wrote:
>
> So, what would be really nice to have is a GLSL extension for some
> kind of switch to select the requested behavior WRT NaN. For example a
> three-way option with "don't generate NaN in arithmetic operations",
> "do generate NaN" and "don't care". It could also be a GL state if
> that's easier to implement with the existing hardware, since an
> individual application isn't supposed to require different behavior
> from one shader to the next.
>
> Is anyone interested in / favorable to something like this? It would
> solve the issue with defining NaN behavior in GLSL while making things
> a bit more compatible with "other API a lot of games are ported from
> which happens to be supported by all the desktop GPUs".


 Not that I'm biased, but on the NVIDIA Tesla series (G80-GT21x), this
 enable is handled via a global flag, not in the shader binary, so this
 is all-or-nothing for a whole pipeline. On GF100+, I believe there is
 also an enable via a global flag, but there are also a FMUL.FMZ (and
 FFMA.FMZ) flag, which I *think* has the same effect. So for GF100+ hw,
 this could be done at the instruction level.
>>>
>>>
>>> Well, I would also have advocated for what is effectively a
>>> per-program/pipeline flag anyway, even though GCN hardware can
theoretically
>>> do it per-instruction. Tracking a per-instruction bit in the compiler
>>> quickly becomes fragile (e.g. there's no good way for us to model this
>>> information per-instruction in LLVM IR). Per-shader isn't any better
than
>>> per-instruction due to linking, and per-shader-stage is awkward if we
ever
>>> want to do fancier cross-stage optimizations.
>>>
>>> It's really quite simple. Introduce an extension with a name like
>>> MESA_shader_float_dx9. The behavior I'd suggest is:
>>>
>>> Enabling/requiring the extension in a shader causes various semantics
>>> changes to bring floating point behavior in line with DX9 in that
shader's
>>> code:
>>>
>>> - 0*x = 0
>>
>> Yes. But only for fp32, not for fp64.
>>
>>> - sqrt/rsqrt are guaranteed to take the absolute value of their argument
>>
>> Is that necessary? If the software knows about the ext, it also knows
>> to stick the abs() in.
>
> Is there a compelling reason to make the extension offer just one of
> these many behavior differences?
>
> FWIW, i965 has IEEE and "ALT" floating-point modes. ALT, I think
> corresponds to d3d9 behavior, and its description says
>
> A floating-point execution mode that maps +/- inf to +/- fmax, +/-
> denorm to +/-0, and NaN to +0 at the FPU inputs and never produces
> infinities, denormals, or NaN values as outputs.

Interesting. I believe on NVIDIA hardware, it's just float multiply
that's affected.

>
> Also: Extended mathematics functions of log(), rsq() and sqrt() take
> the absolute value of the sources before computation to avoid
> generating INF and NaN results.
>
> If those two behaviors correspond to d3d9 behavior, I wouldn't want an
> extension that offered only the "zero wins" behavior and expected
> applications to insert abs().

Really? That creates ARB_gpu_shader5-style extensions which do 75
different things and that you can't expose if you can only do 74 of
them.


I understand your concern but what hardware are we planning to expose this
on that can't do d3d9?  It seems like there are three things here:  inf/NaN
handling, denorm flushing, and abs() on special functions.  I suppose I'd
be OK with separating things out a bit but we need to have a single enable
or else our hatdware is going to have serious problems with it.

I think in the past we've avoided things like having "d3d9 mode"
in gallium API's - it's nice for these things to be individually
enumerated. I like the direction that e.g. ARB_clip_control went in -
make it all configurable individually instead of bundling unrelated
things together. This has allowed e.g. dolphin to do things in OpenGL
that are impossible on DX. And whether 0 * x = 0 or not seems rather
unrelated from whether rsq takes abs of its args.


Unless, of course, it's controlled by the same hardware bit... Clearly, we
can can give you abs on rsq without denorm flushing (easy shader hacks) but not
the other way around.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 21/22] anv: Enable sampling from HiZ

2017-01-12 Thread Chery, Nanley G
Thanks for noticing this. I actually had a patch to handle this that must have 
gotten lost in my rebasing efforts. I'll send it in the v2.

My email isn't working properly, so sorry for any formatting errors.

From: Jason Ekstrand [ja...@jlekstrand.net]
Sent: Thursday, January 12, 2017 10:38 AM
To: Nanley Chery
Cc: mesa-dev@lists.freedesktop.org; Chery, Nanley G
Subject: Re: [Mesa-dev] [PATCH 21/22] anv: Enable sampling from HiZ

On Wed, Jan 11, 2017 at 5:55 PM, Nanley Chery 
> wrote:
Signed-off-by: Nanley Chery 
>
---
 src/intel/vulkan/TODO|  1 -
 src/intel/vulkan/anv_image.c | 19 ---
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/src/intel/vulkan/TODO b/src/intel/vulkan/TODO
index 37fd16b437..38acc0dd5b 100644
--- a/src/intel/vulkan/TODO
+++ b/src/intel/vulkan/TODO
@@ -8,7 +8,6 @@ Missing Features:
  - Sparse memory

 Performance:
- - Sampling from HiZ (Nanley)
  - Multi-{sampled/gen8,LOD} HiZ
  - Compressed multisample support
  - Pushing pieces of UBOs?
diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index 7d5beeabbe..ee563685bb 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -538,10 +538,22 @@ anv_CreateImageView(VkDevice _device,
   iview->isl.usage = 0;
}

-   /* Sampling from HiZ is not yet enabled */
+   /* If the HiZ buffer can be sampled from, set the constant clear color.
+* If it cannot, disable the isl aux usage flag.
+*/
+   float red_clear_color = 0.0f;
enum isl_aux_usage surf_usage = image->aux_usage;
-   if (surf_usage == ISL_AUX_USAGE_HIZ)
-  surf_usage = ISL_AUX_USAGE_NONE;
+   if (image->aux_usage == ISL_AUX_USAGE_HIZ) {
+  if (anv_can_sample_with_hiz(device->info.gen, image->samples)) {
+ /* When a HiZ buffer is sampled on gen9+, ensure that
+  * the constant fast clear value is set in the surface state.
+  */
+ if (device->info.gen >= 9)
+red_clear_color = ANV_HZ_FC_VAL;
+  } else {
+ surf_usage = ISL_AUX_USAGE_NONE;
+  }
+   }

I realized this morning that this doesn't actually do everything we need.  If 
we're going to use a layout-based approach to HiZ, we need two surface states, 
one with HiZ and one without and we need to select which one to put in the 
binding table based on layout.  Otherwise, if they sample from an image in 
LAYOUT_GENERAL and the HiZ data doesn't match the depth data, we'll get the 
wrong values.  I don't think this will be all that hard to solve, but we should 
solve it.


/* Input attachment surfaces for color are allocated and filled
 * out at BeginRenderPass time because they need compression information.
@@ -560,6 +572,7 @@ anv_CreateImageView(VkDevice _device,
   iview->sampler_surface_state.map,
   .surf = >isl,
   .view = ,
+  .clear_color.f32 = { red_clear_color,},
   .aux_surf = >aux_surface.isl,
   .aux_usage = surf_usage,
   .mocs = device->default_mocs);
--
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/10] drm/i915: Add engine reset count in get-reset-stats ioctl

2017-01-12 Thread Michel Thierry
Users/tests relying on the total reset count will start seeing a smaller
number since most of the hangs can be handled by engine reset.
Note that if reset engine x, context a running on engine y will be unaware
and unaffected.

To start the discussion, include just a total engine reset count. If it
is deemed useful, it can be extended to report each engine separately.

v2: s/engine_reset/reset_engine/.

Cc: Chris Wilson 
Cc: Mika Kuoppala 
Cc: mesa-dev@lists.freedesktop.org
Signed-off-by: Michel Thierry 
---
 drivers/gpu/drm/i915/i915_gem_context.c | 14 +++---
 include/uapi/drm/i915_drm.h |  3 ++-
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 227b428a1202..ae55e10284f0 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1284,9 +1284,11 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device 
*dev,
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_reset_stats *args = data;
struct i915_gem_context *ctx;
+   struct intel_engine_cs *engine;
+   enum intel_engine_id id;
int ret;
 
-   if (args->flags || args->pad)
+   if (args->flags)
return -EINVAL;
 
if (args->ctx_id == DEFAULT_CONTEXT_HANDLE && !capable(CAP_SYS_ADMIN))
@@ -1302,10 +1304,16 @@ int i915_gem_context_reset_stats_ioctl(struct 
drm_device *dev,
return PTR_ERR(ctx);
}
 
-   if (capable(CAP_SYS_ADMIN))
+   if (capable(CAP_SYS_ADMIN)) {
args->reset_count = i915_reset_count(_priv->gpu_error);
-   else
+   for_each_engine(engine, dev_priv, id)
+   args->reset_engine_count +=
+   i915_reset_engine_count(_priv->gpu_error,
+   engine);
+   } else {
args->reset_count = 0;
+   args->reset_engine_count = 0;
+   }
 
args->batch_active = ctx->guilty_count;
args->batch_pending = ctx->active_count;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 1110e628c239..58551b06409d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1201,7 +1201,8 @@ struct drm_i915_reset_stats {
/* Number of batches lost pending for execution, for this context */
__u32 batch_pending;
 
-   __u32 pad;
+   /* Number of engine resets since boot/module reload, for all contexts */
+   __u32 reset_engine_count;
 };
 
 struct drm_i915_gem_userptr {
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 21/21] anv: Avoid some resolves for samplable HiZ buffers

2017-01-12 Thread Nanley Chery
v2: Simplify nested ifs (Jason Ekstrand)

Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/genX_cmd_buffer.c | 67 --
 1 file changed, 49 insertions(+), 18 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 59041f0458..f7894a0574 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -311,11 +311,21 @@ need_input_attachment_state(const struct 
anv_render_pass_attachment *att)
 }
 
 static enum isl_aux_usage
-layout_to_hiz_usage(VkImageLayout layout)
+layout_to_hiz_usage(VkImageLayout layout, uint8_t samples)
 {
switch (layout) {
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
   return ISL_AUX_USAGE_HIZ;
+   case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+   case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
+  if (anv_can_sample_with_hiz(GEN_GEN, samples))
+ return ISL_AUX_USAGE_HIZ;
+  /* Fall-through */
+   case VK_IMAGE_LAYOUT_GENERAL:
+  /* This buffer could be used as a source or destination in a transfer
+   * operation. Transfer operations current don't perform HiZ-enabled reads
+   * and writes.
+   */
default:
   return ISL_AUX_USAGE_NONE;
}
@@ -333,28 +343,48 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
 {
assert(image);
 
-   if (image->aux_usage != ISL_AUX_USAGE_HIZ)
+   if (image->aux_usage != ISL_AUX_USAGE_HIZ || final_layout == initial_layout)
   return;
 
-   const bool hiz_enabled = layout_to_hiz_usage(initial_layout) ==
+   const bool hiz_enabled = layout_to_hiz_usage(initial_layout, 
image->samples) ==
 ISL_AUX_USAGE_HIZ;
-   const bool enable_hiz = layout_to_hiz_usage(final_layout) ==
+   const bool enable_hiz = layout_to_hiz_usage(final_layout, image->samples) ==
ISL_AUX_USAGE_HIZ;
 
-   /* We've already initialized the aux HiZ buffer at BindImageMemory time,
-* so there's no need to perform a HIZ resolve or clear to avoid GPU hangs.
-* This initial layout indicates that the user doesn't care about the data
-* that's currently in the buffer, so no resolves are necessary.
-*/
-   if (initial_layout == VK_IMAGE_LAYOUT_UNDEFINED)
-  return;
-
-   if (hiz_enabled == enable_hiz) {
-  /* The same buffer will be used, no resolves are necessary */
+   enum blorp_hiz_op hiz_op;
+   if (initial_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
+  /* We've already initialized the aux HiZ buffer at BindImageMemory time,
+   * so there's no need to perform a HIZ resolve or clear to avoid GPU 
hangs.
+   * This initial layout indicates that the user doesn't care about the 
data
+   * that's currently in the buffer, so resolves are not necessary except
+   * for the special case noted below.
+   */
+  hiz_op = BLORP_HIZ_OP_NONE;
} else if (hiz_enabled && !enable_hiz) {
-  anv_gen8_hiz_op_resolve(cmd_buffer, image, BLORP_HIZ_OP_DEPTH_RESOLVE);
+  hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
+   } else if (!hiz_enabled && enable_hiz) {
+  hiz_op = BLORP_HIZ_OP_HIZ_RESOLVE;
} else {
-  assert(!hiz_enabled && enable_hiz);
+  assert(hiz_enabled == enable_hiz);
+  /* If the same buffer will be used, no resolves are necessary except for
+   * the special case noted below.
+   */
+  hiz_op = BLORP_HIZ_OP_NONE;
+   }
+
+   if (hiz_op != BLORP_HIZ_OP_NONE)
+  anv_gen8_hiz_op_resolve(cmd_buffer, image, hiz_op);
+
+   /* Images that have sampling with HiZ enabled cause all shader sampling to
+* load data with the HiZ buffer. Therefore, in the case of transitioning to
+* the general layout - which currently routes all writes to the depth
+* buffer - we must ensure that the HiZ buffer remains consistent with the
+* depth buffer by performing an additional HIZ resolve if the operation
+* required by this transition was not already a HiZ resolve.
+*/
+   if (final_layout == VK_IMAGE_LAYOUT_GENERAL &&
+   anv_can_sample_with_hiz(GEN_GEN, image->samples) &&
+   hiz_op != BLORP_HIZ_OP_HIZ_RESOLVE) {
   anv_gen8_hiz_op_resolve(cmd_buffer, image, BLORP_HIZ_OP_HIZ_RESOLVE);
}
 }
@@ -512,7 +542,7 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer 
*cmd_buffer,
  } else {
 if (iview->image->aux_usage == ISL_AUX_USAGE_HIZ) {
state->attachments[i].aux_usage =
-  layout_to_hiz_usage(att->initial_layout);
+  layout_to_hiz_usage(att->initial_layout, 
iview->image->samples);
 } else {
state->attachments[i].aux_usage = ISL_AUX_USAGE_NONE;
 }
@@ -2312,7 +2342,8 @@ genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer 
*cmd_buffer,
   cmd_buffer->state.attachments[ds].current_layout =
  cmd_buffer->state.subpass->depth_stencil_layout;
   cmd_buffer->state.attachments[ds].aux_usage =
- 

[Mesa-dev] [PATCH v2 18/21] anv: Add a helper to determine sampling with HiZ

2017-01-12 Thread Nanley Chery
Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/anv_private.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index b669d186ef..56aa6ef6e3 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1606,6 +1606,13 @@ struct anv_image {
struct anv_surface aux_surface;
 };
 
+/* Returns true if a HiZ-enabled depth buffer can be sampled from. */
+static inline bool
+anv_can_sample_with_hiz(uint8_t gen, uint32_t samples)
+{
+   return gen >= 8 && samples == 1;
+}
+
 void
 anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer,
 const struct anv_image *image,
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 19/21] anv/blorp: Don't fast depth clear samplable HiZ buffers on BDW

2017-01-12 Thread Nanley Chery
Avoid the resolves that would be required if fast depth clears were
allowed for such buffers.

Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/anv_blorp.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index b161400db5..2edd0712c9 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1271,6 +1271,15 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer 
*cmd_buffer)
 * ANV_HZ_FC_VAL.
 */
clear_with_hiz = false;
+} else if (gen == 8 &&
+   anv_can_sample_with_hiz(cmd_buffer->device->info.gen,
+   iview->image->samples)) {
+   /* Only gen9+ supports returning ANV_HZ_FC_VAL when sampling a
+* fast-cleared portion of a HiZ buffer. Testing has revealed
+* that Gen8 only supports returning 0.0f. Gens prior to gen8 do
+* not support this feature at all.
+*/
+   clear_with_hiz = false;
 }
  }
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 17/21] isl/surface_state: Handle ISL_AUX_USAGE_HIZ

2017-01-12 Thread Nanley Chery
v2: Remove redundant x/y offset asserts (Jason Ekstrand)

Signed-off-by: Nanley Chery 
---
 src/intel/isl/isl_surface_state.c | 29 +
 1 file changed, 29 insertions(+)

diff --git a/src/intel/isl/isl_surface_state.c 
b/src/intel/isl/isl_surface_state.c
index b9093cc951..b7354781cf 100644
--- a/src/intel/isl/isl_surface_state.c
+++ b/src/intel/isl/isl_surface_state.c
@@ -520,6 +520,26 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, 
void *state,
   s.AuxiliarySurfaceQPitch =
  isl_surf_get_array_pitch_sa_rows(info->aux_surf) >> 2;
   s.AuxiliarySurfaceBaseAddress = info->aux_address;
+
+  if (info->aux_usage == ISL_AUX_USAGE_HIZ) {
+ /* The number of samples must be 1 */
+ assert(info->surf->samples == 1);
+
+ /* The dimension must not be 3D */
+ assert(info->surf->dim != ISL_SURF_DIM_3D);
+
+ /* The format must be one of the following: */
+ switch (info->view->format) {
+ case ISL_FORMAT_R32_FLOAT:
+ case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
+ case ISL_FORMAT_R16_UNORM:
+break;
+ default:
+assert(!"Incompatible HiZ Sampling format");
+break;
+ }
+  }
+
   s.AuxiliarySurfaceMode = isl_to_gen_aux_mode[info->aux_usage];
 #else
   assert(info->aux_usage == ISL_AUX_USAGE_MCS ||
@@ -548,6 +568,15 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, 
void *state,
  s.SamplerL2BypassModeDisable = true;
  break;
   default:
+ /* From the SKL PRM, Programming Note under Sampler Output Channel
+  * Mapping:
+  *
+  *If a surface has an associated HiZ Auxilliary surface, the
+  *Sampler L2 Bypass Mode Disable field in the RENDER_SURFACE_STATE
+  *must be set.
+  */
+ if (GEN_GEN >= 9 && info->aux_usage == ISL_AUX_USAGE_HIZ)
+s.SamplerL2BypassModeDisable = true;
  break;
   }
}
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 20/21] anv: Enable sampling from HiZ

2017-01-12 Thread Nanley Chery
v2: Restrict ISL_AUX_USAGE_HIZ to depth aspects

Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/TODO|  1 -
 src/intel/vulkan/anv_image.c | 20 +---
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/intel/vulkan/TODO b/src/intel/vulkan/TODO
index 37fd16b437..38acc0dd5b 100644
--- a/src/intel/vulkan/TODO
+++ b/src/intel/vulkan/TODO
@@ -8,7 +8,6 @@ Missing Features:
  - Sparse memory
 
 Performance:
- - Sampling from HiZ (Nanley)
  - Multi-{sampled/gen8,LOD} HiZ
  - Compressed multisample support
  - Pushing pieces of UBOs?
diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index e21374b1ab..9e5cebe188 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -538,10 +538,23 @@ anv_CreateImageView(VkDevice _device,
   iview->isl.usage = 0;
}
 
-   /* Sampling from HiZ is not yet enabled */
+   /* If the HiZ buffer can be sampled from, set the constant clear color.
+* If it cannot, disable the isl aux usage flag.
+*/
+   float red_clear_color = 0.0f;
enum isl_aux_usage surf_usage = image->aux_usage;
-   if (surf_usage == ISL_AUX_USAGE_HIZ)
-  surf_usage = ISL_AUX_USAGE_NONE;
+   if (image->aux_usage == ISL_AUX_USAGE_HIZ) {
+  if (iview->aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT &&
+  anv_can_sample_with_hiz(device->info.gen, image->samples)) {
+ /* When a HiZ buffer is sampled on gen9+, ensure that
+  * the constant fast clear value is set in the surface state.
+  */
+ if (device->info.gen >= 9)
+red_clear_color = ANV_HZ_FC_VAL;
+  } else {
+ surf_usage = ISL_AUX_USAGE_NONE;
+  }
+   }
 
/* Input attachment surfaces for color are allocated and filled
 * out at BeginRenderPass time because they need compression information.
@@ -560,6 +573,7 @@ anv_CreateImageView(VkDevice _device,
   iview->sampler_surface_state.map,
   .surf = >isl,
   .view = ,
+  .clear_color.f32 = { red_clear_color,},
   .aux_surf = >aux_surface.isl,
   .aux_usage = surf_usage,
   .mocs = device->default_mocs);
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 05/21] anv: Enable HiZ support for multiple subpasses

2017-01-12 Thread Nanley Chery
We'll be using layout transitions later on in the series which can occur
within and between subpasses. Turn this on now to simplify the change
later.

Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/TODO  |  2 +-
 src/intel/vulkan/gen8_cmd_buffer.c | 11 +--
 src/intel/vulkan/genX_cmd_buffer.c |  8 ++--
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/src/intel/vulkan/TODO b/src/intel/vulkan/TODO
index 5c33725700..37fd16b437 100644
--- a/src/intel/vulkan/TODO
+++ b/src/intel/vulkan/TODO
@@ -9,7 +9,7 @@ Missing Features:
 
 Performance:
  - Sampling from HiZ (Nanley)
- - Multi-{sampled/gen8,LOD,subpass} HiZ
+ - Multi-{sampled/gen8,LOD} HiZ
  - Compressed multisample support
  - Pushing pieces of UBOs?
  - Enable guardband clipping
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c 
b/src/intel/vulkan/gen8_cmd_buffer.c
index 892a035304..81d7727130 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -340,10 +340,6 @@ genX(cmd_buffer_emit_hz_op)(struct anv_cmd_buffer 
*cmd_buffer,
if (iview == NULL || iview->image->aux_usage != ISL_AUX_USAGE_HIZ)
   return;
 
-   /* FINISHME: Implement multi-subpass HiZ */
-   if (cmd_buffer->state.pass->subpass_count > 1)
-  return;
-
const uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
 
/* Section 7.4. of the Vulkan 1.0.27 spec states:
@@ -366,6 +362,8 @@ genX(cmd_buffer_emit_hz_op)(struct anv_cmd_buffer 
*cmd_buffer,
/* This variable corresponds to the Pixel Dim column in the table below */
struct isl_extent2d px_dim;
 
+   const uint32_t subpass_idx = cmd_state->subpass - 
cmd_state->pass->subpasses;
+
/* Validate that we can perform the HZ operation and that it's necessary. */
switch (op) {
case BLORP_HIZ_OP_DEPTH_CLEAR:
@@ -446,7 +444,8 @@ genX(cmd_buffer_emit_hz_op)(struct anv_cmd_buffer 
*cmd_buffer,
   break;
case BLORP_HIZ_OP_DEPTH_RESOLVE:
   if (cmd_buffer->state.pass->attachments[ds].store_op !=
-  VK_ATTACHMENT_STORE_OP_STORE)
+  VK_ATTACHMENT_STORE_OP_STORE &&
+  subpass_idx == cmd_state->pass->subpass_count - 1)
  return;
   break;
case BLORP_HIZ_OP_HIZ_RESOLVE:
@@ -461,7 +460,7 @@ genX(cmd_buffer_emit_hz_op)(struct anv_cmd_buffer 
*cmd_buffer,
*/
   if (full_surface_op &&
   cmd_buffer->state.pass->attachments[ds].load_op !=
-  VK_ATTACHMENT_LOAD_OP_LOAD)
+  VK_ATTACHMENT_LOAD_OP_LOAD && subpass_idx == 0)
  return;
   break;
case BLORP_HIZ_OP_NONE:
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 47d3322e48..b670d00e2d 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2105,12 +2105,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer 
*cmd_buffer)
 depth_stencil_surface_type(image->depth_surface.isl.dim);
  db.DepthWriteEnable  = true;
  db.StencilWriteEnable= has_stencil;
-
- if (cmd_buffer->state.pass->subpass_count == 1) {
-db.HierarchicalDepthBufferEnable = has_hiz;
- } else {
-anv_finishme("Multiple-subpass HiZ not implemented");
- }
+ db.HierarchicalDepthBufferEnable = has_hiz;
 
  db.SurfaceFormat = isl_surf_get_depth_format(>isl_dev,
   
>depth_surface.isl);
@@ -2287,6 +2282,7 @@ void genX(CmdNextSubpass)(
 
assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
 
+   genX(cmd_buffer_emit_hz_op)(cmd_buffer, BLORP_HIZ_OP_DEPTH_RESOLVE);
anv_cmd_buffer_resolve_subpass(cmd_buffer);
genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1);
 }
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 15/21] anv: Disable HiZ for input attachments

2017-01-12 Thread Nanley Chery
v2 (Jason Ekstrand):
- Add spec citation
- Drop conditional

Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/anv_image.c   | 23 +++
 src/intel/vulkan/genX_cmd_buffer.c | 17 +
 2 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index d821629191..e21374b1ab 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -182,6 +182,20 @@ make_surface(const struct anv_device *dev,
*/
   if (!(image->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
  /* It will never be used as an attachment, HiZ is pointless. */
+  } else if (image->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
+ /* From the 1.0.37 spec:
+  *
+  *"An attachment used as an input attachment and depth/stencil
+  *attachment must be in either VK_IMAGE_LAYOUT_GENERAL or
+  *VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL."
+  *
+  * It will never have a layout of
+  * VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, so HiZ is
+  * currently pointless. If transfer operations learn to use the HiZ
+  * buffer, we can enable HiZ for VK_IMAGE_LAYOUT_GENERAL and support
+  * input attachments.
+  */
+ anv_finishme("Implement HiZ for input attachments");
   } else if (!env_var_as_boolean("INTEL_VK_HIZ", dev->info.gen >= 8)) {
  anv_finishme("Implement gen7 HiZ");
   } else if (vk_info->mipLevels > 1) {
@@ -529,14 +543,15 @@ anv_CreateImageView(VkDevice _device,
if (surf_usage == ISL_AUX_USAGE_HIZ)
   surf_usage = ISL_AUX_USAGE_NONE;
 
-   /* Input attachment surfaces for color or depth are allocated and filled
+   /* Input attachment surfaces for color are allocated and filled
 * out at BeginRenderPass time because they need compression information.
-* Stencil image do not support compression so we just use the texture
-* surface from the image view.
+* Compression is not yet enabled for depth textures and stencil doesn't
+* allow compression so we can just use the texture surface state from the
+* view.
 */
if (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT ||
(image->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT &&
-(iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT))) {
+!(iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT))) {
   iview->sampler_surface_state = alloc_surface_state(device);
 
   struct isl_view view = iview->isl;
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index baa932e517..05f29ee57c 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -303,11 +303,11 @@ need_input_attachment_state(const struct 
anv_render_pass_attachment *att)
if (!(att->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
   return false;
 
-   /* We only allocate input attachment states for color and depth surfaces.
-* Stencil doesn't allow compression so we can just use the texture surface
-* state from the view
+   /* We only allocate input attachment states for color surfaces. Compression
+* is not yet enabled for depth textures and stencil doesn't allow
+* compression so we can just use the texture surface state from the view.
 */
-   return vk_format_is_color(att->format) || vk_format_has_depth(att->format);
+   return vk_format_is_color(att->format);
 }
 
 static enum isl_aux_usage
@@ -515,18 +515,11 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer 
*cmd_buffer,
  }
 
  if (need_input_attachment_state(>attachments[i])) {
-const struct isl_surf *surf;
-if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
-   surf = >image->color_surface.isl;
-} else {
-   surf = >image->depth_surface.isl;
-}
-
 struct isl_view view = iview->isl;
 view.usage |= ISL_SURF_USAGE_TEXTURE_BIT;
 isl_surf_fill_state(isl_dev,
 state->attachments[i].input_att_state.map,
-.surf = surf,
+.surf = >image->color_surface.isl,
 .view = ,
 .aux_surf = >image->aux_surface.isl,
 .aux_usage = 
state->attachments[i].input_aux_usage,
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 09/21] anv: Use the gen8 BLORP HiZ resolving function

2017-01-12 Thread Nanley Chery
Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/genX_cmd_buffer.c | 27 ---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 63f6be12a8..74369f6ba1 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2248,8 +2248,15 @@ genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer 
*cmd_buffer,
 
cmd_buffer->state.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS;
 
+   const struct anv_image_view *iview =
+  anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
+
+   if (iview) {
+  anv_gen8_hiz_op_resolve(cmd_buffer, iview->image,
+  BLORP_HIZ_OP_HIZ_RESOLVE);
+   }
+
cmd_buffer_emit_depth_stencil(cmd_buffer);
-   genX(cmd_buffer_emit_hz_op)(cmd_buffer, BLORP_HIZ_OP_HIZ_RESOLVE);
 
anv_cmd_buffer_clear_subpass(cmd_buffer);
 }
@@ -2281,7 +2288,14 @@ void genX(CmdNextSubpass)(
 
assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
 
-   genX(cmd_buffer_emit_hz_op)(cmd_buffer, BLORP_HIZ_OP_DEPTH_RESOLVE);
+   const struct anv_image_view *iview =
+  anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
+
+   if (iview) {
+  anv_gen8_hiz_op_resolve(cmd_buffer, iview->image,
+  BLORP_HIZ_OP_DEPTH_RESOLVE);
+   }
+
anv_cmd_buffer_resolve_subpass(cmd_buffer);
genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1);
 }
@@ -2291,7 +2305,14 @@ void genX(CmdEndRenderPass)(
 {
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 
-   genX(cmd_buffer_emit_hz_op)(cmd_buffer, BLORP_HIZ_OP_DEPTH_RESOLVE);
+   const struct anv_image_view *iview =
+  anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
+
+   if (iview) {
+  anv_gen8_hiz_op_resolve(cmd_buffer, iview->image,
+  BLORP_HIZ_OP_DEPTH_RESOLVE);
+   }
+
anv_cmd_buffer_resolve_subpass(cmd_buffer);
 
 #ifndef NDEBUG
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 11/21] anv: Add helpers to handle depth buffer layout transitions

2017-01-12 Thread Nanley Chery
Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/genX_cmd_buffer.c | 50 ++
 1 file changed, 50 insertions(+)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 74369f6ba1..fff9bd37c0 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -310,6 +310,56 @@ need_input_attachment_state(const struct 
anv_render_pass_attachment *att)
return vk_format_is_color(att->format) || vk_format_has_depth(att->format);
 }
 
+static enum isl_aux_usage
+layout_to_hiz_usage(VkImageLayout layout)
+{
+   switch (layout) {
+   case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+  return ISL_AUX_USAGE_HIZ;
+   default:
+  return ISL_AUX_USAGE_NONE;
+   }
+}
+
+/* Transitions a HiZ-enabled depth buffer from one layout to another. Unless
+ * the initial layout is undefined, the HiZ buffer and depth buffer will
+ * represent the same data at the end of this operation.
+ */
+static void
+transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
+const struct anv_image *image,
+VkImageLayout initial_layout,
+VkImageLayout final_layout)
+{
+   assert(image);
+
+   if (image->aux_usage != ISL_AUX_USAGE_HIZ)
+  return;
+
+   const bool hiz_enabled = layout_to_hiz_usage(initial_layout) ==
+ISL_AUX_USAGE_HIZ;
+   const bool enable_hiz = layout_to_hiz_usage(final_layout) ==
+   ISL_AUX_USAGE_HIZ;
+
+   /* We've already initialized the aux HiZ buffer at BindImageMemory time,
+* so there's no need to perform a HIZ resolve or clear to avoid GPU hangs.
+* This initial layout indicates that the user doesn't care about the data
+* that's currently in the buffer, so no resolves are necessary.
+*/
+   if (initial_layout == VK_IMAGE_LAYOUT_UNDEFINED)
+  return;
+
+   if (hiz_enabled == enable_hiz) {
+  /* The same buffer will be used, no resolves are necessary */
+   } else if (hiz_enabled && !enable_hiz) {
+  anv_gen8_hiz_op_resolve(cmd_buffer, image, BLORP_HIZ_OP_DEPTH_RESOLVE);
+   } else {
+  assert(!hiz_enabled && enable_hiz);
+  anv_gen8_hiz_op_resolve(cmd_buffer, image, BLORP_HIZ_OP_HIZ_RESOLVE);
+   }
+}
+
+
 /**
  * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass.
  */
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 16/21] anv: Perform HiZ resolves only on layout transitions

2017-01-12 Thread Nanley Chery
This is a better mapping to the Vulkan API and improves performance in
all tested workloads.

v2: Remove unnecessary image view aspect checks (Jason Ekstrand)

Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/anv_blorp.c   | 48 ++--
 src/intel/vulkan/genX_cmd_buffer.c | 50 ++
 2 files changed, 42 insertions(+), 56 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 4394323a6b..b161400db5 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1580,52 +1580,8 @@ anv_gen8_hiz_op_resolve(struct anv_cmd_buffer 
*cmd_buffer,
image->aux_usage != ISL_AUX_USAGE_HIZ)
   return;
 
-   const struct anv_cmd_state *cmd_state = _buffer->state;
-   const uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
-
-   /* Section 7.4. of the Vulkan 1.0.27 spec states:
-*
-*   "The render area must be contained within the framebuffer dimensions."
-*
-* Therefore, the only way the extent of the render area can match that of
-* the image view is if the render area offset equals (0, 0).
-*/
-   const bool full_surface_op =
- cmd_state->render_area.extent.width == image->extent.width &&
- cmd_state->render_area.extent.height == image->extent.height;
-   if (full_surface_op)
-  assert(cmd_state->render_area.offset.x == 0 &&
- cmd_state->render_area.offset.y == 0);
-
-   /* Check the subpass index to determine if skipping a resolve is allowed */
-   const uint32_t subpass_idx = cmd_state->subpass - 
cmd_state->pass->subpasses;
-   switch (op) {
-   case BLORP_HIZ_OP_DEPTH_RESOLVE:
-  if (cmd_buffer->state.pass->attachments[ds].store_op !=
-  VK_ATTACHMENT_STORE_OP_STORE &&
-  subpass_idx == cmd_state->pass->subpass_count - 1)
- return;
-  break;
-   case BLORP_HIZ_OP_HIZ_RESOLVE:
-  /* If the render area covers the entire surface *and* load_op is either
-   * CLEAR or DONT_CARE then the previous contents of the depth buffer
-   * will be entirely discarded.  In this case, we can skip the HiZ
-   * resolve.
-   *
-   * If the render area is not the full surface, we need to do
-   * the resolve because otherwise data outside the render area may get
-   * garbled by the resolve at the end of the render pass.
-   */
-  if (full_surface_op &&
-  cmd_buffer->state.pass->attachments[ds].load_op !=
-  VK_ATTACHMENT_LOAD_OP_LOAD && subpass_idx == 0)
- return;
-  break;
-   case BLORP_HIZ_OP_DEPTH_CLEAR:
-   case BLORP_HIZ_OP_NONE:
-  unreachable("Invalid HiZ OP");
-   }
-
+   assert(op == BLORP_HIZ_OP_HIZ_RESOLVE ||
+  op == BLORP_HIZ_OP_DEPTH_RESOLVE);
 
struct blorp_batch batch;
blorp_batch_init(_buffer->device->blorp, , cmd_buffer, 0);
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 05f29ee57c..59041f0458 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -510,7 +510,12 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer 
*cmd_buffer,
   state->attachments[i].aux_usage,
   state->attachments[i].color_rt_state);
  } else {
-state->attachments[i].aux_usage = iview->image->aux_usage;
+if (iview->image->aux_usage == ISL_AUX_USAGE_HIZ) {
+   state->attachments[i].aux_usage =
+  layout_to_hiz_usage(att->initial_layout);
+} else {
+   state->attachments[i].aux_usage = ISL_AUX_USAGE_NONE;
+}
 state->attachments[i].input_aux_usage = ISL_AUX_USAGE_NONE;
  }
 
@@ -910,6 +915,13 @@ void genX(CmdPipelineBarrier)(
for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
   src_flags |= pImageMemoryBarriers[i].srcAccessMask;
   dst_flags |= pImageMemoryBarriers[i].dstAccessMask;
+  ANV_FROM_HANDLE(anv_image, image, pImageMemoryBarriers[i].image);
+  if (pImageMemoryBarriers[i].subresourceRange.aspectMask &
+  VK_IMAGE_ASPECT_DEPTH_BIT) {
+ transition_depth_buffer(cmd_buffer, image,
+ pImageMemoryBarriers[i].oldLayout,
+ pImageMemoryBarriers[i].newLayout);
+  }
}
 
enum anv_pipe_bits pipe_bits = 0;
@@ -2292,9 +2304,15 @@ genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer 
*cmd_buffer,
const struct anv_image_view *iview =
   anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
 
-   if (iview) {
-  anv_gen8_hiz_op_resolve(cmd_buffer, iview->image,
-  BLORP_HIZ_OP_HIZ_RESOLVE);
+   if (iview && iview->image->aux_usage == ISL_AUX_USAGE_HIZ) {
+  const uint32_t ds = subpass->depth_stencil_attachment;
+  transition_depth_buffer(cmd_buffer, iview->image,
+  

[Mesa-dev] [PATCH v2 12/21] anv: Store depth stencil layouts

2017-01-12 Thread Nanley Chery
Store the current and requested depth stencil layouts so that we can
perform the appropriate HiZ resolves for a given transition while
recording a render pass.

Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/anv_pass.c|  5 +
 src/intel/vulkan/anv_private.h | 11 +++
 src/intel/vulkan/genX_cmd_buffer.c |  1 +
 3 files changed, 17 insertions(+)

diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c
index c1c149b48b..ea86fa9ff2 100644
--- a/src/intel/vulkan/anv_pass.c
+++ b/src/intel/vulkan/anv_pass.c
@@ -74,6 +74,8 @@ VkResult anv_CreateRenderPass(
   att->load_op = pCreateInfo->pAttachments[i].loadOp;
   att->store_op = pCreateInfo->pAttachments[i].storeOp;
   att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
+  att->initial_layout = pCreateInfo->pAttachments[i].initialLayout;
+  att->final_layout = pCreateInfo->pAttachments[i].finalLayout;
   att->subpass_usage = usages;
   usages += pass->subpass_count;
}
@@ -161,6 +163,8 @@ VkResult anv_CreateRenderPass(
   if (desc->pDepthStencilAttachment) {
  uint32_t a = desc->pDepthStencilAttachment->attachment;
  subpass->depth_stencil_attachment = a;
+ subpass->depth_stencil_layout =
+desc->pDepthStencilAttachment->layout;
  if (a != VK_ATTACHMENT_UNUSED) {
 pass->attachments[a].usage |=
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
@@ -168,6 +172,7 @@ VkResult anv_CreateRenderPass(
  }
   } else {
  subpass->depth_stencil_attachment = VK_ATTACHMENT_UNUSED;
+ subpass->depth_stencil_layout = VK_IMAGE_LAYOUT_UNDEFINED;
   }
}
 
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 650c40c2d9..9e7e8cfe3d 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1097,6 +1097,7 @@ struct anv_attachment_state {
struct anv_state color_rt_state;
struct anv_state input_att_state;
 
+   VkImageLayoutcurrent_layout;
VkImageAspectFlags   pending_clear_aspects;
bool fast_clear;
VkClearValue clear_value;
@@ -1730,7 +1731,12 @@ struct anv_subpass {
uint32_t color_count;
uint32_t *   color_attachments;
uint32_t *   resolve_attachments;
+
+   /* TODO: Consider storing the depth/stencil VkAttachmentReference
+* instead of its two structure members (below) individually.
+*/
uint32_t depth_stencil_attachment;
+   VkImageLayoutdepth_stencil_layout;
 
/** Subpass has a depth/stencil self-dependency */
bool has_ds_self_dep;
@@ -1747,12 +1753,17 @@ enum anv_subpass_usage {
 };
 
 struct anv_render_pass_attachment {
+   /* TODO: Consider using VkAttachmentDescription instead of storing each of
+* its members individually.
+*/
VkFormat format;
uint32_t samples;
VkImageUsageFlagsusage;
VkAttachmentLoadOp   load_op;
VkAttachmentStoreOp  store_op;
VkAttachmentLoadOp   stencil_load_op;
+   VkImageLayoutinitial_layout;
+   VkImageLayoutfinal_layout;
 
/* An array, indexed by subpass id, of how the attachment will be used. */
enum anv_subpass_usage * subpass_usage;
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index fff9bd37c0..95d0cfc983 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -480,6 +480,7 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer 
*cmd_buffer,
 }
  }
 
+ state->attachments[i].current_layout = att->initial_layout;
  state->attachments[i].pending_clear_aspects = clear_aspects;
  if (clear_aspects)
 state->attachments[i].clear_value = begin->pClearValues[i];
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 08/21] anv/blorp: Add a gen8 HiZ op resolve function

2017-01-12 Thread Nanley Chery
Add an entry point for resolving using BLORP's gen8 HiZ op function.

v2: Manually add the aux info

Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/anv_blorp.c   | 83 ++
 src/intel/vulkan/anv_private.h |  5 +++
 2 files changed, 88 insertions(+)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index fdea2d0a23..f7aaa22554 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1559,3 +1559,86 @@ anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer 
*cmd_buffer)
 
blorp_batch_finish();
 }
+
+void
+anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer,
+const struct anv_image *image,
+enum blorp_hiz_op op)
+{
+   assert(image);
+
+   /* Don't resolve depth buffers without an auxiliary HiZ buffer and
+* don't perform such a resolve on gens that don't support it.
+*/
+   if (cmd_buffer->device->info.gen < 8 ||
+   image->aux_usage != ISL_AUX_USAGE_HIZ)
+  return;
+
+   const struct anv_cmd_state *cmd_state = _buffer->state;
+   const uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
+
+   /* Section 7.4. of the Vulkan 1.0.27 spec states:
+*
+*   "The render area must be contained within the framebuffer dimensions."
+*
+* Therefore, the only way the extent of the render area can match that of
+* the image view is if the render area offset equals (0, 0).
+*/
+   const bool full_surface_op =
+ cmd_state->render_area.extent.width == image->extent.width &&
+ cmd_state->render_area.extent.height == image->extent.height;
+   if (full_surface_op)
+  assert(cmd_state->render_area.offset.x == 0 &&
+ cmd_state->render_area.offset.y == 0);
+
+   /* Check the subpass index to determine if skipping a resolve is allowed */
+   const uint32_t subpass_idx = cmd_state->subpass - 
cmd_state->pass->subpasses;
+   switch (op) {
+   case BLORP_HIZ_OP_DEPTH_RESOLVE:
+  if (cmd_buffer->state.pass->attachments[ds].store_op !=
+  VK_ATTACHMENT_STORE_OP_STORE &&
+  subpass_idx == cmd_state->pass->subpass_count - 1)
+ return;
+  break;
+   case BLORP_HIZ_OP_HIZ_RESOLVE:
+  /* If the render area covers the entire surface *and* load_op is either
+   * CLEAR or DONT_CARE then the previous contents of the depth buffer
+   * will be entirely discarded.  In this case, we can skip the HiZ
+   * resolve.
+   *
+   * If the render area is not the full surface, we need to do
+   * the resolve because otherwise data outside the render area may get
+   * garbled by the resolve at the end of the render pass.
+   */
+  if (full_surface_op &&
+  cmd_buffer->state.pass->attachments[ds].load_op !=
+  VK_ATTACHMENT_LOAD_OP_LOAD && subpass_idx == 0)
+ return;
+  break;
+   case BLORP_HIZ_OP_DEPTH_CLEAR:
+   case BLORP_HIZ_OP_NONE:
+  unreachable("Invalid HiZ OP");
+   }
+
+
+   struct blorp_batch batch;
+   blorp_batch_init(_buffer->device->blorp, , cmd_buffer, 0);
+
+   struct blorp_surf surf;
+   get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT,
+ISL_AUX_USAGE_NONE, );
+
+   /* Manually add the aux HiZ surf */
+   surf.aux_surf = >aux_surface.isl,
+   surf.aux_addr = (struct blorp_address) {
+  .buffer = image->bo,
+  .offset = image->offset + image->aux_surface.offset,
+   };
+   surf.aux_usage = ISL_AUX_USAGE_HIZ;
+
+   surf.clear_color.u32[0] = (uint32_t)
+  cmd_state->attachments[ds].clear_value.depthStencil.depth;
+
+   blorp_gen6_hiz_op(, , 0, 0, op);
+   blorp_batch_finish();
+}
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 26f2986005..650c40c2d9 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1590,6 +1590,11 @@ struct anv_image {
struct anv_surface aux_surface;
 };
 
+void
+anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer,
+const struct anv_image *image,
+enum blorp_hiz_op op);
+
 static inline uint32_t
 anv_get_layerCount(const struct anv_image *image,
const VkImageSubresourceRange *range)
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 10/21] anv: Delete anv's HiZ op emit function

2017-01-12 Thread Nanley Chery
This is no longer used.

Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/anv_genX.h|   3 -
 src/intel/vulkan/gen7_cmd_buffer.c |   7 --
 src/intel/vulkan/gen8_cmd_buffer.c | 223 -
 3 files changed, 233 deletions(-)

diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index 35ee3bb380..d04fe38a51 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -61,9 +61,6 @@ genX(emit_urb_setup)(struct anv_device *device, struct 
anv_batch *batch,
  VkShaderStageFlags active_stages,
  const unsigned entry_size[4]);
 
-void genX(cmd_buffer_emit_hz_op)(struct anv_cmd_buffer *cmd_buffer,
-   enum blorp_hiz_op op);
-
 void genX(cmd_buffer_gpu_memcpy)(struct anv_cmd_buffer *cmd_buffer,
  struct anv_bo *dst, uint32_t dst_offset,
  struct anv_bo *src, uint32_t src_offset,
diff --git a/src/intel/vulkan/gen7_cmd_buffer.c 
b/src/intel/vulkan/gen7_cmd_buffer.c
index 38e400b2d1..8d68aba9c9 100644
--- a/src/intel/vulkan/gen7_cmd_buffer.c
+++ b/src/intel/vulkan/gen7_cmd_buffer.c
@@ -256,13 +256,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer 
*cmd_buffer)
cmd_buffer->state.dirty = 0;
 }
 
-void
-genX(cmd_buffer_emit_hz_op)(struct anv_cmd_buffer *cmd_buffer,
-  enum blorp_hiz_op op)
-{
-   anv_finishme("Implement Gen7 HZ ops");
-}
-
 void genX(CmdSetEvent)(
 VkCommandBuffer commandBuffer,
 VkEvent event,
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c 
b/src/intel/vulkan/gen8_cmd_buffer.c
index 81d7727130..f22037b570 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -322,229 +322,6 @@ void genX(CmdBindIndexBuffer)(
cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER;
 }
 
-
-/**
- * Emit the HZ_OP packet in the sequence specified by the BDW PRM section
- * entitled: "Optimized Depth Buffer Clear and/or Stencil Buffer Clear."
- *
- * \todo Enable Stencil Buffer-only clears
- */
-void
-genX(cmd_buffer_emit_hz_op)(struct anv_cmd_buffer *cmd_buffer,
-  enum blorp_hiz_op op)
-{
-   struct anv_cmd_state *cmd_state = _buffer->state;
-   const struct anv_image_view *iview =
-  anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
-
-   if (iview == NULL || iview->image->aux_usage != ISL_AUX_USAGE_HIZ)
-  return;
-
-   const uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
-
-   /* Section 7.4. of the Vulkan 1.0.27 spec states:
-*
-*   "The render area must be contained within the framebuffer dimensions."
-*
-* Therefore, the only way the extent of the render area can match that of
-* the image view is if the render area offset equals (0, 0).
-*/
-   const bool full_surface_op =
- cmd_state->render_area.extent.width == iview->extent.width &&
- cmd_state->render_area.extent.height == iview->extent.height;
-   if (full_surface_op)
-  assert(cmd_state->render_area.offset.x == 0 &&
- cmd_state->render_area.offset.y == 0);
-
-   bool depth_clear;
-   bool stencil_clear;
-
-   /* This variable corresponds to the Pixel Dim column in the table below */
-   struct isl_extent2d px_dim;
-
-   const uint32_t subpass_idx = cmd_state->subpass - 
cmd_state->pass->subpasses;
-
-   /* Validate that we can perform the HZ operation and that it's necessary. */
-   switch (op) {
-   case BLORP_HIZ_OP_DEPTH_CLEAR:
-  stencil_clear = VK_IMAGE_ASPECT_STENCIL_BIT &
-  cmd_state->attachments[ds].pending_clear_aspects;
-  depth_clear = VK_IMAGE_ASPECT_DEPTH_BIT &
-cmd_state->attachments[ds].pending_clear_aspects;
-
-  /* Apply alignment restrictions. Despite the BDW PRM mentioning this is
-   * only needed for a depth buffer surface type of D16_UNORM, testing
-   * showed it to be necessary for other depth formats as well
-   * (e.g., D32_FLOAT).
-   */
-#if GEN_GEN == 8
-  /* Pre-SKL, HiZ has an 8x4 sample block. As the number of samples
-   * increases, the number of pixels representable by this block
-   * decreases by a factor of the sample dimensions. Sample dimensions
-   * scale following the MSAA interleaved pattern.
-   *
-   * Sample|Sample|Pixel
-   * Count |Dim   |Dim
-   * ===
-   *1  | 1x1  | 8x4
-   *2  | 2x1  | 4x4
-   *4  | 2x2  | 4x2
-   *8  | 4x2  | 2x2
-   *   16  | 4x4  | 2x1
-   *
-   * Table: Pixel Dimensions in a HiZ Sample Block Pre-SKL
-   */
-  /* This variable corresponds to the Sample Dim column in the table
-   * above.
-   */
-  const struct isl_extent2d sa_dim =
- isl_get_interleaved_msaa_px_size_sa(iview->image->samples);
-  px_dim.w = 8 / 

[Mesa-dev] [PATCH v2 13/21] anv: Prepare for transitioning to the requested final layout

2017-01-12 Thread Nanley Chery
Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/anv_pass.c| 3 +++
 src/intel/vulkan/anv_private.h | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c
index ea86fa9ff2..5df6330c6a 100644
--- a/src/intel/vulkan/anv_pass.c
+++ b/src/intel/vulkan/anv_pass.c
@@ -118,6 +118,7 @@ VkResult anv_CreateRenderPass(
 subpass->input_attachments[j] = a;
 pass->attachments[a].usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
 pass->attachments[a].subpass_usage[i] |= ANV_SUBPASS_USAGE_INPUT;
+pass->attachments[a].last_subpass_idx = i;
 
 if (desc->pDepthStencilAttachment &&
 a == desc->pDepthStencilAttachment->attachment)
@@ -134,6 +135,7 @@ VkResult anv_CreateRenderPass(
 subpass->color_attachments[j] = a;
 pass->attachments[a].usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
 pass->attachments[a].subpass_usage[i] |= ANV_SUBPASS_USAGE_DRAW;
+pass->attachments[a].last_subpass_idx = i;
  }
   }
 
@@ -156,6 +158,7 @@ VkResult anv_CreateRenderPass(
   ANV_SUBPASS_USAGE_RESOLVE_SRC;
pass->attachments[a].subpass_usage[i] |=
   ANV_SUBPASS_USAGE_RESOLVE_DST;
+   pass->attachments[a].last_subpass_idx = i;
 }
  }
   }
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 9e7e8cfe3d..80e451b96e 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1767,6 +1767,9 @@ struct anv_render_pass_attachment {
 
/* An array, indexed by subpass id, of how the attachment will be used. */
enum anv_subpass_usage * subpass_usage;
+
+   /* The subpass id in which the attachment will be used last. */
+   uint32_t last_subpass_idx;
 };
 
 struct anv_render_pass {
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 14/21] anv: Avoid resolves incurred by fast depth clears

2017-01-12 Thread Nanley Chery
Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/anv_blorp.c   |  9 +++--
 src/intel/vulkan/anv_private.h | 15 +++
 src/intel/vulkan/genX_cmd_buffer.c |  5 +
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index f7aaa22554..4394323a6b 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1265,6 +1265,12 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer 
*cmd_buffer)
render_area.offset.y +
render_area.extent.height)) {
clear_with_hiz = false;
+} else if (clear_att.clearValue.depthStencil.depth !=
+   ANV_HZ_FC_VAL) {
+   /* Don't enable fast depth clears for any color not equal to
+* ANV_HZ_FC_VAL.
+*/
+   clear_with_hiz = false;
 }
  }
 
@@ -1636,8 +1642,7 @@ anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer,
};
surf.aux_usage = ISL_AUX_USAGE_HIZ;
 
-   surf.clear_color.u32[0] = (uint32_t)
-  cmd_state->attachments[ds].clear_value.depthStencil.depth;
+   surf.clear_color.u32[0] = (uint32_t) ANV_HZ_FC_VAL;
 
blorp_gen6_hiz_op(, , 0, 0, op);
blorp_batch_finish();
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 80e451b96e..b669d186ef 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -72,6 +72,21 @@ struct gen_l3_config;
 extern "C" {
 #endif
 
+/* Allowing different clear colors requires us to perform a depth resolve at
+ * the end of certain render passes. This is because while slow clears store
+ * the clear color in the HiZ buffer, fast clears (without a resolve) don't.
+ * See the PRMs for examples describing when additional resolves would be
+ * necessary. To enable fast clears without requiring extra resolves, we set
+ * the clear value to a globally-defined one. We could allow different values
+ * if the user doesn't expect coherent data during or after a render passes
+ * (VK_ATTACHMENT_STORE_OP_DONT_CARE), but such users (aside from the CTS)
+ * don't seem to exist yet. In almost all Vulkan applications tested thus far,
+ * 1.0f seems to be the only value used. The only application that doesn't set
+ * this value does so through the usage of an seemingly uninitialized clear
+ * value.
+ */
+#define ANV_HZ_FC_VAL 1.0f
+
 #define MAX_VBS 32
 #define MAX_SETS 8
 #define MAX_RTS  8
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 95d0cfc983..baa932e517 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2283,10 +2283,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer 
*cmd_buffer)
anv_batch_emit(_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS), cp) {
   if (has_hiz) {
  cp.DepthClearValueValid = true;
- const uint32_t ds =
-cmd_buffer->state.subpass->depth_stencil_attachment;
- cp.DepthClearValue =
-cmd_buffer->state.attachments[ds].clear_value.depthStencil.depth;
+ cp.DepthClearValue = ANV_HZ_FC_VAL;
   }
}
 }
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 04/21] anv: Use ::anv_attachment_state for toggling HiZ per subpass

2017-01-12 Thread Nanley Chery
We're about to enable HiZ support for multiple subpasses. Use this field
to keep track of whether or not subpass operations should treat the
depth buffer as having an auxiliary HiZ buffer.

Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/genX_cmd_buffer.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index a372e6420f..47d3322e48 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -459,7 +459,7 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer 
*cmd_buffer,
   state->attachments[i].aux_usage,
   state->attachments[i].color_rt_state);
  } else {
-state->attachments[i].aux_usage = ISL_AUX_USAGE_NONE;
+state->attachments[i].aux_usage = iview->image->aux_usage;
 state->attachments[i].input_aux_usage = ISL_AUX_USAGE_NONE;
  }
 
@@ -2087,7 +2087,9 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer 
*cmd_buffer)
   anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
const struct anv_image *image = iview ? iview->image : NULL;
const bool has_depth = image && (image->aspects & 
VK_IMAGE_ASPECT_DEPTH_BIT);
-   const bool has_hiz = image != NULL && image->aux_usage == ISL_AUX_USAGE_HIZ;
+   const uint32_t ds = cmd_buffer->state.subpass->depth_stencil_attachment;
+   const bool has_hiz = image != NULL &&
+  cmd_buffer->state.attachments[ds].aux_usage == ISL_AUX_USAGE_HIZ;
const bool has_stencil =
   image && (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 01/21] intel/blorp: Add the BDW+ optimized HZ_OP sequence to BLORP

2017-01-12 Thread Nanley Chery
We'll be switching to layout-transition based resolves which can occur
outside of a render pass. Add this sequence to BLORP, as using BLORP
will enable emitting depth stencil state outside of a render pass (among
other benefits). The depth buffer extent is ignored to enable eventual
usage in VkCmdClearAttachments().

Signed-off-by: Nanley Chery 
---
 src/intel/blorp/blorp_genX_exec.h | 87 +++
 1 file changed, 87 insertions(+)

diff --git a/src/intel/blorp/blorp_genX_exec.h 
b/src/intel/blorp/blorp_genX_exec.h
index 66906fabbc..a673ab8141 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -1237,6 +1237,86 @@ blorp_emit_3dstate_multisample(struct blorp_batch *batch,
}
 }
 
+#if GEN_GEN >= 8
+/* Emits the Optimized HiZ sequence specified in the BDW+ PRMs. The
+ * depth/stencil buffer extents are ignored to handle APIs which perform
+ * clearing operations without such information.
+ * */
+static void
+blorp_emit_gen8_hiz_op(struct blorp_batch *batch,
+   const struct blorp_params *params)
+{
+   /* We should be performing an operation on a depth or stencil buffer.
+*/
+   assert(params->depth.enabled || params->stencil.enabled);
+
+   /* The stencil buffer should only be enabled if a fast clear operation is
+* requested.
+*/
+   if (params->stencil.enabled)
+  assert(params->hiz_op == BLORP_HIZ_OP_DEPTH_CLEAR);
+
+   /* If we can't alter the depth stencil config and multiple layers are
+* involved, the HiZ op will fail. This is because the op requires that a
+* new config is emitted for each additional layer.
+*/
+   if (batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL) {
+  assert(params->num_layers <= 1);
+   } else {
+  blorp_emit_depth_stencil_config(batch, params);
+   }
+
+   blorp_emit(batch, GENX(3DSTATE_WM_HZ_OP), hzp) {
+  switch (params->hiz_op) {
+  case BLORP_HIZ_OP_DEPTH_CLEAR:
+ hzp.StencilBufferClearEnable = params->stencil.enabled;
+ hzp.DepthBufferClearEnable = params->depth.enabled;
+ hzp.StencilClearValue = params->stencil_ref;
+ break;
+  case BLORP_HIZ_OP_DEPTH_RESOLVE:
+ hzp.DepthBufferResolveEnable = true;
+ break;
+  case BLORP_HIZ_OP_HIZ_RESOLVE:
+ hzp.HierarchicalDepthBufferResolveEnable = true;
+ break;
+  case BLORP_HIZ_OP_NONE:
+ unreachable("Invalid HIZ op");
+  }
+
+  hzp.NumberofMultisamples = ffs(params->num_samples) - 1;
+  hzp.SampleMask = 0x;
+
+  /* Due to a hardware issue, this bit MBZ */
+  assert(hzp.ScissorRectangleEnable == false);
+
+  /* Contrary to the HW docs both fields are inclusive */
+  hzp.ClearRectangleXMin = params->x0;
+  hzp.ClearRectangleYMin = params->y0;
+
+  /* Contrary to the HW docs both fields are exclusive */
+  hzp.ClearRectangleXMax = params->x1;
+  hzp.ClearRectangleYMax = params->y1;
+   }
+
+   /* PIPE_CONTROL w/ all bits clear except for “Post-Sync Operation” must set
+* to “Write Immediate Data” enabled.
+*/
+   blorp_emit(batch, GENX(PIPE_CONTROL), pc) {
+  pc.PostSyncOperation = WriteImmediateData;
+   }
+
+   blorp_emit(batch, GENX(3DSTATE_WM_HZ_OP), hzp);
+
+   /* Perform depth clear specific flushing */
+   if (params->hiz_op == BLORP_HIZ_OP_DEPTH_CLEAR && params->depth.enabled) {
+  blorp_emit(batch, GENX(PIPE_CONTROL), pc) {
+ pc.DepthStallEnable = true;
+ pc.DepthCacheFlushEnable = true;
+  }
+   }
+}
+#endif
+
 /* 3DSTATE_VIEWPORT_STATE_POINTERS */
 static void
 blorp_emit_viewport_state(struct blorp_batch *batch,
@@ -1283,6 +1363,13 @@ blorp_exec(struct blorp_batch *batch, const struct 
blorp_params *params)
uint32_t color_calc_state_offset = 0;
uint32_t depth_stencil_state_offset;
 
+#if GEN_GEN >= 8
+   if (params->hiz_op != BLORP_HIZ_OP_NONE) {
+  blorp_emit_gen8_hiz_op(batch, params);
+  return;
+   }
+#endif
+
blorp_emit_vertex_buffers(batch, params);
blorp_emit_vertex_elements(batch, params);
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 02/21] anv/blorp: Handle ISL_AUX_USAGE_HIZ

2017-01-12 Thread Nanley Chery
Prevent assert failures that would occur in the next patch.

v2: Don't remove asserts from blorp/blit (Jason Ekstrand)

Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/anv_blorp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index b431d6af48..7e157b4d26 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -172,7 +172,8 @@ get_blorp_surf_for_anv_image(const struct anv_image *image,
  enum isl_aux_usage aux_usage,
  struct blorp_surf *blorp_surf)
 {
-   if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT)
+   if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT ||
+   aux_usage == ISL_AUX_USAGE_HIZ)
   aux_usage = ISL_AUX_USAGE_NONE;
 
const struct anv_surface *surface =
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 03/21] anv: Replace anv_image_has_hiz() with ISL_AUX_USAGE_HIZ

2017-01-12 Thread Nanley Chery
The helper doesn't provide additional functionality over the current
infrastructure.

v2: Add comment to anv_image::aux_usage (Jason Ekstrand)

Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/anv_blorp.c   |  2 +-
 src/intel/vulkan/anv_image.c   | 10 --
 src/intel/vulkan/anv_private.h | 15 ---
 src/intel/vulkan/gen8_cmd_buffer.c |  2 +-
 src/intel/vulkan/genX_cmd_buffer.c |  2 +-
 5 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 7e157b4d26..97ce236fc5 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -858,7 +858,7 @@ void anv_CmdClearDepthStencilImage(
struct blorp_surf depth, stencil;
if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
   get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT,
-   image->aux_usage, );
+   ISL_AUX_USAGE_NONE, );
} else {
   memset(, 0, sizeof(depth));
}
diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index f262d8a524..d821629191 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -195,6 +195,7 @@ make_surface(const struct anv_device *dev,
  isl_surf_get_hiz_surf(>isl_dev, >depth_surface.isl,
>aux_surface.isl);
  add_surface(image, >aux_surface);
+ image->aux_usage = ISL_AUX_USAGE_HIZ;
   }
} else if (aspect == VK_IMAGE_ASPECT_COLOR_BIT && vk_info->samples == 1) {
   if (!unlikely(INTEL_DEBUG & DEBUG_NO_RBC)) {
@@ -523,6 +524,11 @@ anv_CreateImageView(VkDevice _device,
   iview->isl.usage = 0;
}
 
+   /* Sampling from HiZ is not yet enabled */
+   enum isl_aux_usage surf_usage = image->aux_usage;
+   if (surf_usage == ISL_AUX_USAGE_HIZ)
+  surf_usage = ISL_AUX_USAGE_NONE;
+
/* Input attachment surfaces for color or depth are allocated and filled
 * out at BeginRenderPass time because they need compression information.
 * Stencil image do not support compression so we just use the texture
@@ -540,7 +546,7 @@ anv_CreateImageView(VkDevice _device,
   .surf = >isl,
   .view = ,
   .aux_surf = >aux_surface.isl,
-  .aux_usage = image->aux_usage,
+  .aux_usage = surf_usage,
   .mocs = device->default_mocs);
 
   if (!device->info.has_llc)
@@ -564,7 +570,7 @@ anv_CreateImageView(VkDevice _device,
  .surf = >isl,
  .view = ,
  .aux_surf = >aux_surface.isl,
- .aux_usage = image->aux_usage,
+ .aux_usage = surf_usage,
  .mocs = device->default_mocs);
   } else {
  anv_fill_buffer_surface_state(device, iview->storage_surface_state,
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index dbc8c3cf68..26f2986005 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1581,7 +1581,10 @@ struct anv_image {
   };
};
 
-   /** The aux usage for this surface when outside a render pass */
+   /**
+* The aux usage for this surface when outside a render pass
+* (and inside a render pass for HiZ-enabled depth stencil attachments).
+*/
enum isl_aux_usage aux_usage;
 
struct anv_surface aux_surface;
@@ -1642,16 +1645,6 @@ const struct anv_surface *
 anv_image_get_surface_for_aspect_mask(const struct anv_image *image,
   VkImageAspectFlags aspect_mask);
 
-static inline bool
-anv_image_has_hiz(const struct anv_image *image)
-{
-   /* We must check the aspect because anv_image::aux_surface may be used for
-* any type of auxiliary surface, not just HiZ.
-*/
-   return (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
-  image->aux_surface.isl.size > 0;
-}
-
 struct anv_buffer_view {
enum isl_format format; /**< VkBufferViewCreateInfo::format */
struct anv_bo *bo;
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c 
b/src/intel/vulkan/gen8_cmd_buffer.c
index 3e4aa9bc62..892a035304 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -337,7 +337,7 @@ genX(cmd_buffer_emit_hz_op)(struct anv_cmd_buffer 
*cmd_buffer,
const struct anv_image_view *iview =
   anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
 
-   if (iview == NULL || !anv_image_has_hiz(iview->image))
+   if (iview == NULL || iview->image->aux_usage != ISL_AUX_USAGE_HIZ)
   return;
 
/* FINISHME: Implement multi-subpass HiZ */
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 7ff0d3ebba..a372e6420f 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ 

[Mesa-dev] [PATCH v2 07/21] anv: Use gen8 BLORP HiZ clearing functions

2017-01-12 Thread Nanley Chery
Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/anv_blorp.c   | 54 +++---
 src/intel/vulkan/genX_cmd_buffer.c |  1 -
 2 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 97ce236fc5..fdea2d0a23 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1163,6 +1163,8 @@ void
 anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer)
 {
const struct anv_cmd_state *cmd_state = _buffer->state;
+   const VkRect2D render_area = cmd_buffer->state.render_area;
+
 
if (!subpass_needs_clear(cmd_buffer))
   return;
@@ -1197,8 +1199,6 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer 
*cmd_buffer)
att_state->aux_usage, );
   surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
 
-  const VkRect2D render_area = cmd_buffer->state.render_area;
-
   if (att_state->fast_clear) {
  blorp_fast_clear(, , iview->isl.format,
   iview->isl.base_level,
@@ -1238,8 +1238,54 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer 
*cmd_buffer)
  .clearValue = cmd_state->attachments[ds].clear_value,
   };
 
-  clear_depth_stencil_attachment(cmd_buffer, ,
- _att, 1, _rect);
+
+  const uint8_t gen = cmd_buffer->device->info.gen;
+  bool clear_with_hiz = gen >= 8 && cmd_state->attachments[ds].aux_usage ==
+ISL_AUX_USAGE_HIZ;
+  const struct anv_image_view *iview = fb->attachments[ds];
+
+  if (clear_with_hiz) {
+ const bool clear_depth = clear_att.aspectMask &
+  VK_IMAGE_ASPECT_DEPTH_BIT;
+ const bool clear_stencil = clear_att.aspectMask &
+VK_IMAGE_ASPECT_STENCIL_BIT;
+
+ /* Check against restrictions for depth buffer clearing. A great GPU
+  * performance benefit isn't expected when using the HZ sequence for
+  * stencil-only clears. Therefore, we don't emit a HZ op sequence for
+  * a stencil clear in addition to using the BLORP-fallback for depth.
+  */
+ if (clear_depth) {
+if (!blorp_can_hiz_clear_depth(gen, iview->isl.format,
+   iview->image->samples,
+   render_area.offset.x,
+   render_area.offset.y,
+   render_area.offset.x +
+   render_area.extent.width,
+   render_area.offset.y +
+   render_area.extent.height)) {
+   clear_with_hiz = false;
+}
+ }
+
+ if (clear_with_hiz) {
+blorp_gen8_hiz_clear_attachments(, iview->image->samples,
+ render_area.offset.x,
+ render_area.offset.y,
+ render_area.offset.x +
+ render_area.extent.width,
+ render_area.offset.y +
+ render_area.extent.height,
+ clear_depth, clear_stencil,
+ clear_att.clearValue.
+depthStencil.stencil);
+ }
+  }
+
+  if (!clear_with_hiz) {
+ clear_depth_stencil_attachment(cmd_buffer, ,
+_att, 1, _rect);
+  }
 
   cmd_state->attachments[ds].pending_clear_aspects = 0;
}
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index b670d00e2d..63f6be12a8 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2250,7 +2250,6 @@ genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer 
*cmd_buffer,
 
cmd_buffer_emit_depth_stencil(cmd_buffer);
genX(cmd_buffer_emit_hz_op)(cmd_buffer, BLORP_HIZ_OP_HIZ_RESOLVE);
-   genX(cmd_buffer_emit_hz_op)(cmd_buffer, BLORP_HIZ_OP_DEPTH_CLEAR);
 
anv_cmd_buffer_clear_subpass(cmd_buffer);
 }
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 06/21] intel/blorp_clear: Add gen8 HiZ clearing functions

2017-01-12 Thread Nanley Chery
Add an entry point for the optimized gen8 BLORP HiZ sequence. commit
c9eaf12de20ac4143fe79d42018bdbb5a391356f fixed a bug that was
unknowingly worked around by forcing additional clear rectangle
alignment restrictions not specified in the PRMs. Now that the bug is no
longer present, omit the additional alignment restrictions.

v2: Adjust code comment about padding

Signed-off-by: Nanley Chery 
---
 src/intel/blorp/blorp.h   | 12 +++
 src/intel/blorp/blorp_clear.c | 81 +++
 2 files changed, 93 insertions(+)

diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
index 823475b607..ff60567fc4 100644
--- a/src/intel/blorp/blorp.h
+++ b/src/intel/blorp/blorp.h
@@ -155,8 +155,20 @@ blorp_clear_depth_stencil(struct blorp_batch *batch,
   uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1,
   bool clear_depth, float depth_value,
   uint8_t stencil_mask, uint8_t stencil_value);
+bool
+blorp_can_hiz_clear_depth(uint8_t gen, enum isl_format format,
+  uint32_t num_samples,
+  uint32_t x0, uint32_t y0,
+  uint32_t x1, uint32_t y1);
 
 void
+blorp_gen8_hiz_clear_attachments(struct blorp_batch *batch,
+ uint32_t num_samples,
+ uint32_t x0, uint32_t y0,
+ uint32_t x1, uint32_t y1,
+ bool clear_depth, bool clear_stencil,
+ uint8_t stencil_value);
+void
 blorp_clear_attachments(struct blorp_batch *batch,
 uint32_t binding_table_offset,
 enum isl_format depth_format,
diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
index d090408721..afc505d86c 100644
--- a/src/intel/blorp/blorp_clear.c
+++ b/src/intel/blorp/blorp_clear.c
@@ -470,6 +470,87 @@ blorp_clear_depth_stencil(struct blorp_batch *batch,
}
 }
 
+bool
+blorp_can_hiz_clear_depth(uint8_t gen, enum isl_format format,
+  uint32_t num_samples,
+  uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1)
+{
+   /* This function currently doesn't support any gen prior to gen8 */
+   assert(gen >= 8);
+
+   if (gen == 8 && format == ISL_FORMAT_R16_UNORM) {
+  /* Apply the D16 alignment restrictions. On BDW, HiZ has an 8x4 sample
+   * block with the following property: as the number of samples increases,
+   * the number of pixels representable by this block decreases by a factor
+   * of the sample dimensions. Sample dimensions scale following the MSAA
+   * interleaved pattern.
+   *
+   * Sample|Sample|Pixel
+   * Count |Dim   |Dim
+   * ===
+   *1  | 1x1  | 8x4
+   *2  | 2x1  | 4x4
+   *4  | 2x2  | 4x2
+   *8  | 4x2  | 2x2
+   *   16  | 4x4  | 2x1
+   *
+   * Table: Pixel Dimensions in a HiZ Sample Block Pre-SKL
+   */
+  const struct isl_extent2d sa_block_dim =
+ isl_get_interleaved_msaa_px_size_sa(num_samples);
+  const uint8_t align_px_w = 8 / sa_block_dim.w;
+  const uint8_t align_px_h = 4 / sa_block_dim.h;
+
+  /* Fast depth clears clear an entire sample block at a time. As a result,
+   * the rectangle must be aligned to the dimensions of the encompassing
+   * pixel block for a successful operation.
+   *
+   * Fast clears can still work if the upper-left corner is aligned and the
+   * bottom-rigtht corner touches the edge of a depth buffer whose extent
+   * is unaligned. This is because each miplevel in the depth buffer is
+   * padded by the Pixel Dim (similar to a standard compressed texture).
+   * In this case, the clear rectangle could be padded by to match the full
+   * depth buffer extent but to support multiple clearing techniques, we
+   * chose to be unaware of the depth buffer's extent and thus don't handle
+   * this case.
+   */
+  if (x0 % align_px_w || y0 % align_px_h ||
+  x1 % align_px_w || y1 % align_px_h)
+ return false;
+   }
+   return true;
+}
+
+/* Given a depth stencil attachment, this function performs a fast depth clear
+ * on a depth portion and a regular clear on the stencil portion. When
+ * performing a fast depth clear on the depth portion, the HiZ buffer is simply
+ * tagged as cleared so the depth clear value is not actually needed.
+ */
+void
+blorp_gen8_hiz_clear_attachments(struct blorp_batch *batch,
+ uint32_t num_samples,
+ uint32_t x0, uint32_t y0,
+ uint32_t x1, uint32_t y1,
+ bool clear_depth, bool clear_stencil,
+ uint8_t stencil_value)
+{
+   assert(batch->flags & 

Re: [Mesa-dev] NaN behavior in GLSL (was Re: [PATCH] glsl: always do sqrt(abs()) and inversesqrt(abs()))

2017-01-12 Thread Matt Turner
On Thu, Jan 12, 2017 at 4:56 PM, Ilia Mirkin  wrote:
> On Thu, Jan 12, 2017 at 7:46 PM, Matt Turner  wrote:
>> On Thu, Jan 12, 2017 at 3:20 PM, Ilia Mirkin  wrote:
>>> On Thu, Jan 12, 2017 at 6:04 PM, Nicolai Hähnle  wrote:
 On 12.01.2017 23:46, Ilia Mirkin wrote:
>
> On Thu, Jan 12, 2017 at 4:03 PM, Matteo Bruni 
> wrote:
>>
>> So, what would be really nice to have is a GLSL extension for some
>> kind of switch to select the requested behavior WRT NaN. For example a
>> three-way option with "don't generate NaN in arithmetic operations",
>> "do generate NaN" and "don't care". It could also be a GL state if
>> that's easier to implement with the existing hardware, since an
>> individual application isn't supposed to require different behavior
>> from one shader to the next.
>>
>> Is anyone interested in / favorable to something like this? It would
>> solve the issue with defining NaN behavior in GLSL while making things
>> a bit more compatible with "other API a lot of games are ported from
>> which happens to be supported by all the desktop GPUs".
>
>
> Not that I'm biased, but on the NVIDIA Tesla series (G80-GT21x), this
> enable is handled via a global flag, not in the shader binary, so this
> is all-or-nothing for a whole pipeline. On GF100+, I believe there is
> also an enable via a global flag, but there are also a FMUL.FMZ (and
> FFMA.FMZ) flag, which I *think* has the same effect. So for GF100+ hw,
> this could be done at the instruction level.


 Well, I would also have advocated for what is effectively a
 per-program/pipeline flag anyway, even though GCN hardware can 
 theoretically
 do it per-instruction. Tracking a per-instruction bit in the compiler
 quickly becomes fragile (e.g. there's no good way for us to model this
 information per-instruction in LLVM IR). Per-shader isn't any better than
 per-instruction due to linking, and per-shader-stage is awkward if we ever
 want to do fancier cross-stage optimizations.

 It's really quite simple. Introduce an extension with a name like
 MESA_shader_float_dx9. The behavior I'd suggest is:

 Enabling/requiring the extension in a shader causes various semantics
 changes to bring floating point behavior in line with DX9 in that shader's
 code:

 - 0*x = 0
>>>
>>> Yes. But only for fp32, not for fp64.
>>>
 - sqrt/rsqrt are guaranteed to take the absolute value of their argument
>>>
>>> Is that necessary? If the software knows about the ext, it also knows
>>> to stick the abs() in.
>>
>> Is there a compelling reason to make the extension offer just one of
>> these many behavior differences?
>>
>> FWIW, i965 has IEEE and "ALT" floating-point modes. ALT, I think
>> corresponds to d3d9 behavior, and its description says
>>
>> A floating-point execution mode that maps +/- inf to +/- fmax, +/-
>> denorm to +/-0, and NaN to +0 at the FPU inputs and never produces
>> infinities, denormals, or NaN values as outputs.
>
> Interesting. I believe on NVIDIA hardware, it's just float multiply
> that's affected.
>
>>
>> Also: Extended mathematics functions of log(), rsq() and sqrt() take
>> the absolute value of the sources before computation to avoid
>> generating INF and NaN results.
>>
>> If those two behaviors correspond to d3d9 behavior, I wouldn't want an
>> extension that offered only the "zero wins" behavior and expected
>> applications to insert abs().
>
> Really? That creates ARB_gpu_shader5-style extensions which do 75
> different things and that you can't expose if you can only do 74 of
> them. I think in the past we've avoided things like having "d3d9 mode"
> in gallium API's - it's nice for these things to be individually
> enumerated. I like the direction that e.g. ARB_clip_control went in -
> make it all configurable individually instead of bundling unrelated
> things together. This has allowed e.g. dolphin to do things in OpenGL
> that are impossible on DX. And whether 0 * x = 0 or not seems rather
> unrelated from whether rsq takes abs of its args.

Definitely agree. Sorry about i965 :)

I think we should figure out what behaviors D3D9 actually wants.
i965's ALT mode maps ±inf as ±fmax on input. If D3D9 wants that... we
should probably include it in the spec.

Also, if the extension is written in a way that isn't doable on i965 I
think we're just wasting time. i965 is the only driver that cannot use
st/nine. :)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] anv/pipeline: Replace get_fs_input_map with get_last_vue_prog_data

2017-01-12 Thread Jason Ekstrand
This lets us delete a helper from genX_pipeline.c
---
 src/intel/vulkan/anv_pipeline.c  |  2 +-
 src/intel/vulkan/anv_private.h   | 10 +-
 src/intel/vulkan/genX_pipeline.c | 20 
 3 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 6c939b0..a86dd98 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -270,7 +270,7 @@ populate_wm_prog_key(const struct anv_pipeline *pipeline,
/* TODO: we could set this to 0 based on the information in nir_shader, but
 * this function is called before spirv_to_nir. */
const struct brw_vue_map *vue_map =
-  anv_pipeline_get_fs_input_map(pipeline);
+  _pipeline_get_last_vue_prog_data(pipeline)->vue_map;
key->input_slots_valid = vue_map->slots_valid;
 
/* Vulkan doesn't specify a default */
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index dbc8c3c..ce2b591 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1482,15 +1482,15 @@ ANV_DECL_GET_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
 ANV_DECL_GET_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
 ANV_DECL_GET_PROG_DATA_FUNC(cs, MESA_SHADER_COMPUTE)
 
-static inline const struct brw_vue_map *
-anv_pipeline_get_fs_input_map(const struct anv_pipeline *pipeline)
+static inline const struct brw_vue_prog_data *
+anv_pipeline_get_last_vue_prog_data(const struct anv_pipeline *pipeline)
 {
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
-  return _gs_prog_data(pipeline)->base.vue_map;
+  return _gs_prog_data(pipeline)->base;
else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
-  return _tes_prog_data(pipeline)->base.vue_map;
+  return _tes_prog_data(pipeline)->base;
else
-  return _vs_prog_data(pipeline)->base.vue_map;
+  return _vs_prog_data(pipeline)->base;
 }
 
 VkResult
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index b58aac4..7fa68c0 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -286,7 +286,6 @@ static void
 emit_3dstate_sbe(struct anv_pipeline *pipeline)
 {
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
-   const struct brw_vue_map *fs_input_map;
 
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
   anv_batch_emit(>batch, GENX(3DSTATE_SBE), sbe);
@@ -296,7 +295,8 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline)
   return;
}
 
-   fs_input_map = anv_pipeline_get_fs_input_map(pipeline);
+   const struct brw_vue_map *fs_input_map =
+  _pipeline_get_last_vue_prog_data(pipeline)->vue_map;
 
struct GENX(3DSTATE_SBE) sbe = {
   GENX(3DSTATE_SBE_header),
@@ -846,19 +846,6 @@ emit_cb_state(struct anv_pipeline *pipeline,
}
 }
 
-/**
- * Get the brw_vue_prog_data for the last stage which outputs VUEs.
- */
-static inline struct brw_vue_prog_data *
-get_last_vue_prog_data(struct anv_pipeline *pipeline)
-{
-   for (int s = MESA_SHADER_GEOMETRY; s >= 0; s--) {
-  if (pipeline->shaders[s])
- return (struct brw_vue_prog_data *) pipeline->shaders[s]->prog_data;
-   }
-   return NULL;
-}
-
 static void
 emit_3dstate_clip(struct anv_pipeline *pipeline,
   const VkPipelineViewportStateCreateInfo *vp_info,
@@ -886,7 +873,8 @@ emit_3dstate_clip(struct anv_pipeline *pipeline,
   clip.FrontWinding= vk_to_gen_front_face[rs_info->frontFace];
   clip.CullMode= vk_to_gen_cullmode[rs_info->cullMode];
   clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable;
-  const struct brw_vue_prog_data *last = get_last_vue_prog_data(pipeline);
+  const struct brw_vue_prog_data *last =
+ anv_pipeline_get_last_vue_prog_data(pipeline);
   if (last) {
  clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask;
  clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] NaN behavior in GLSL (was Re: [PATCH] glsl: always do sqrt(abs()) and inversesqrt(abs()))

2017-01-12 Thread Ilia Mirkin
On Thu, Jan 12, 2017 at 7:46 PM, Matt Turner  wrote:
> On Thu, Jan 12, 2017 at 3:20 PM, Ilia Mirkin  wrote:
>> On Thu, Jan 12, 2017 at 6:04 PM, Nicolai Hähnle  wrote:
>>> On 12.01.2017 23:46, Ilia Mirkin wrote:

 On Thu, Jan 12, 2017 at 4:03 PM, Matteo Bruni 
 wrote:
>
> So, what would be really nice to have is a GLSL extension for some
> kind of switch to select the requested behavior WRT NaN. For example a
> three-way option with "don't generate NaN in arithmetic operations",
> "do generate NaN" and "don't care". It could also be a GL state if
> that's easier to implement with the existing hardware, since an
> individual application isn't supposed to require different behavior
> from one shader to the next.
>
> Is anyone interested in / favorable to something like this? It would
> solve the issue with defining NaN behavior in GLSL while making things
> a bit more compatible with "other API a lot of games are ported from
> which happens to be supported by all the desktop GPUs".


 Not that I'm biased, but on the NVIDIA Tesla series (G80-GT21x), this
 enable is handled via a global flag, not in the shader binary, so this
 is all-or-nothing for a whole pipeline. On GF100+, I believe there is
 also an enable via a global flag, but there are also a FMUL.FMZ (and
 FFMA.FMZ) flag, which I *think* has the same effect. So for GF100+ hw,
 this could be done at the instruction level.
>>>
>>>
>>> Well, I would also have advocated for what is effectively a
>>> per-program/pipeline flag anyway, even though GCN hardware can theoretically
>>> do it per-instruction. Tracking a per-instruction bit in the compiler
>>> quickly becomes fragile (e.g. there's no good way for us to model this
>>> information per-instruction in LLVM IR). Per-shader isn't any better than
>>> per-instruction due to linking, and per-shader-stage is awkward if we ever
>>> want to do fancier cross-stage optimizations.
>>>
>>> It's really quite simple. Introduce an extension with a name like
>>> MESA_shader_float_dx9. The behavior I'd suggest is:
>>>
>>> Enabling/requiring the extension in a shader causes various semantics
>>> changes to bring floating point behavior in line with DX9 in that shader's
>>> code:
>>>
>>> - 0*x = 0
>>
>> Yes. But only for fp32, not for fp64.
>>
>>> - sqrt/rsqrt are guaranteed to take the absolute value of their argument
>>
>> Is that necessary? If the software knows about the ext, it also knows
>> to stick the abs() in.
>
> Is there a compelling reason to make the extension offer just one of
> these many behavior differences?
>
> FWIW, i965 has IEEE and "ALT" floating-point modes. ALT, I think
> corresponds to d3d9 behavior, and its description says
>
> A floating-point execution mode that maps +/- inf to +/- fmax, +/-
> denorm to +/-0, and NaN to +0 at the FPU inputs and never produces
> infinities, denormals, or NaN values as outputs.

Interesting. I believe on NVIDIA hardware, it's just float multiply
that's affected.

>
> Also: Extended mathematics functions of log(), rsq() and sqrt() take
> the absolute value of the sources before computation to avoid
> generating INF and NaN results.
>
> If those two behaviors correspond to d3d9 behavior, I wouldn't want an
> extension that offered only the "zero wins" behavior and expected
> applications to insert abs().

Really? That creates ARB_gpu_shader5-style extensions which do 75
different things and that you can't expose if you can only do 74 of
them. I think in the past we've avoided things like having "d3d9 mode"
in gallium API's - it's nice for these things to be individually
enumerated. I like the direction that e.g. ARB_clip_control went in -
make it all configurable individually instead of bundling unrelated
things together. This has allowed e.g. dolphin to do things in OpenGL
that are impossible on DX. And whether 0 * x = 0 or not seems rather
unrelated from whether rsq takes abs of its args.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/22] i965/fs: fix lower SIMD width for IVB/VLV's MOVE_INDIRECT

2017-01-12 Thread Francisco Jerez
Samuel Iglesias Gonsálvez  writes:

> From: "Juan A. Suarez Romero" 
>
> Previous to Broadwell, we have 8 registers for MOV_INDIRECT. But if
> IVB/VLV deal with DFs, we will duplicate the exec_size from 8 to 16.
>
> This patch limits the SIMD width to 4 in this case.
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index cfce364..45d320d 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -4959,8 +4959,13 @@ get_lowered_simd_width(const struct gen_device_info 
> *devinfo,
>return MIN2(8, inst->exec_size);
>  
> case SHADER_OPCODE_MOV_INDIRECT:
> -  /* Prior to Broadwell, we only have 8 address subregisters */
> -  return MIN3(devinfo->gen >= 8 ? 16 : 8,
> +  /* Prior to Broadwell, we only have 8 address subregisters. Special 
> case
> +   * for IVB/VLV and DF types: set to 4 (exec_size will be later
> +   * duplicated).

The comment seems rather misleading, exec size doubling is unlikely to
have anything to do with this problem.

> +   */
> +  return MIN3(devinfo->gen >= 8 ? 16 : ((devinfo->gen == 7 &&
> + !devinfo->is_haswell &&
> + inst->exec_data_size() == 8) ? 
> 4 : 8),
>2 * REG_SIZE / (inst->dst.stride * 
> type_sz(inst->dst.type)),
>inst->exec_size);

I'm amazed that this works at all on HSW, according to the IVB and HSW
PRMs:

"2.When the destination requires two registers and the sources are
 indirect, the sources must use 1x1 regioning mode. In addition, the
 sources must be assembled from GRF registers each accessed by adjacent
 index registers in 1x1 regioning modes."

So for DF instructions the execution size is not limited by the number
of address registers you have available, but by the EU decompression
logic not handling VxH indirect addressing correctly.

I think this should be something along the lines of:

|   const unsigned max_size = (devinfo->gen >= 8 ? 2 : 1) * REG_SIZE;
|   return MIN3(devinfo->gen >= 8 ? 16 : 8,
|   max_size / (inst->dst.stride * type_sz(inst->dst.type)),
|   inst->exec_size);

>  
> -- 
> 2.9.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] NaN behavior in GLSL (was Re: [PATCH] glsl: always do sqrt(abs()) and inversesqrt(abs()))

2017-01-12 Thread Matt Turner
On Thu, Jan 12, 2017 at 3:20 PM, Ilia Mirkin  wrote:
> On Thu, Jan 12, 2017 at 6:04 PM, Nicolai Hähnle  wrote:
>> On 12.01.2017 23:46, Ilia Mirkin wrote:
>>>
>>> On Thu, Jan 12, 2017 at 4:03 PM, Matteo Bruni 
>>> wrote:

 So, what would be really nice to have is a GLSL extension for some
 kind of switch to select the requested behavior WRT NaN. For example a
 three-way option with "don't generate NaN in arithmetic operations",
 "do generate NaN" and "don't care". It could also be a GL state if
 that's easier to implement with the existing hardware, since an
 individual application isn't supposed to require different behavior
 from one shader to the next.

 Is anyone interested in / favorable to something like this? It would
 solve the issue with defining NaN behavior in GLSL while making things
 a bit more compatible with "other API a lot of games are ported from
 which happens to be supported by all the desktop GPUs".
>>>
>>>
>>> Not that I'm biased, but on the NVIDIA Tesla series (G80-GT21x), this
>>> enable is handled via a global flag, not in the shader binary, so this
>>> is all-or-nothing for a whole pipeline. On GF100+, I believe there is
>>> also an enable via a global flag, but there are also a FMUL.FMZ (and
>>> FFMA.FMZ) flag, which I *think* has the same effect. So for GF100+ hw,
>>> this could be done at the instruction level.
>>
>>
>> Well, I would also have advocated for what is effectively a
>> per-program/pipeline flag anyway, even though GCN hardware can theoretically
>> do it per-instruction. Tracking a per-instruction bit in the compiler
>> quickly becomes fragile (e.g. there's no good way for us to model this
>> information per-instruction in LLVM IR). Per-shader isn't any better than
>> per-instruction due to linking, and per-shader-stage is awkward if we ever
>> want to do fancier cross-stage optimizations.
>>
>> It's really quite simple. Introduce an extension with a name like
>> MESA_shader_float_dx9. The behavior I'd suggest is:
>>
>> Enabling/requiring the extension in a shader causes various semantics
>> changes to bring floating point behavior in line with DX9 in that shader's
>> code:
>>
>> - 0*x = 0
>
> Yes. But only for fp32, not for fp64.
>
>> - sqrt/rsqrt are guaranteed to take the absolute value of their argument
>
> Is that necessary? If the software knows about the ext, it also knows
> to stick the abs() in.

Is there a compelling reason to make the extension offer just one of
these many behavior differences?

FWIW, i965 has IEEE and "ALT" floating-point modes. ALT, I think
corresponds to d3d9 behavior, and its description says

A floating-point execution mode that maps +/- inf to +/- fmax, +/-
denorm to +/-0, and NaN to +0 at the FPU inputs and never produces
infinities, denormals, or NaN values as outputs.

Also: Extended mathematics functions of log(), rsq() and sqrt() take
the absolute value of the sources before computation to avoid
generating INF and NaN results.

If those two behaviors correspond to d3d9 behavior, I wouldn't want an
extension that offered only the "zero wins" behavior and expected
applications to insert abs().

Other things to note: ALT mode is not supported with half or double
precision, and
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 94512] X segfaults with glx-tls enabled in a x32 environment

2017-01-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=94512

Matt Turner  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #14 from Matt Turner  ---
Fixed by

commit 89458366585c34879b70110758bb4fd3acb62ce0
Author: Grazvydas Ignotas 
Date:   Sun Jan 8 19:38:09 2017 +0200

mapi: update the asm code to support x32

Thanks Grazvydas!

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] NaN behavior in GLSL (was Re: [PATCH] glsl: always do sqrt(abs()) and inversesqrt(abs()))

2017-01-12 Thread Marek Olšák
On Fri, Jan 13, 2017 at 12:43 AM, Nicolai Hähnle  wrote:
> On 13.01.2017 00:20, Ilia Mirkin wrote:
>>
>> On Thu, Jan 12, 2017 at 6:04 PM, Nicolai Hähnle 
>> wrote:
>>>
>>> On 12.01.2017 23:46, Ilia Mirkin wrote:


 On Thu, Jan 12, 2017 at 4:03 PM, Matteo Bruni 
 wrote:
>
>
> So, what would be really nice to have is a GLSL extension for some
> kind of switch to select the requested behavior WRT NaN. For example a
> three-way option with "don't generate NaN in arithmetic operations",
> "do generate NaN" and "don't care". It could also be a GL state if
> that's easier to implement with the existing hardware, since an
> individual application isn't supposed to require different behavior
> from one shader to the next.
>
> Is anyone interested in / favorable to something like this? It would
> solve the issue with defining NaN behavior in GLSL while making things
> a bit more compatible with "other API a lot of games are ported from
> which happens to be supported by all the desktop GPUs".



 Not that I'm biased, but on the NVIDIA Tesla series (G80-GT21x), this
 enable is handled via a global flag, not in the shader binary, so this
 is all-or-nothing for a whole pipeline. On GF100+, I believe there is
 also an enable via a global flag, but there are also a FMUL.FMZ (and
 FFMA.FMZ) flag, which I *think* has the same effect. So for GF100+ hw,
 this could be done at the instruction level.
>>>
>>>
>>>
>>> Well, I would also have advocated for what is effectively a
>>> per-program/pipeline flag anyway, even though GCN hardware can
>>> theoretically
>>> do it per-instruction. Tracking a per-instruction bit in the compiler
>>> quickly becomes fragile (e.g. there's no good way for us to model this
>>> information per-instruction in LLVM IR). Per-shader isn't any better than
>>> per-instruction due to linking, and per-shader-stage is awkward if we
>>> ever
>>> want to do fancier cross-stage optimizations.
>>>
>>> It's really quite simple. Introduce an extension with a name like
>>> MESA_shader_float_dx9. The behavior I'd suggest is:
>>>
>>> Enabling/requiring the extension in a shader causes various semantics
>>> changes to bring floating point behavior in line with DX9 in that
>>> shader's
>>> code:
>>>
>>> - 0*x = 0
>>
>>
>> Yes. But only for fp32, not for fp64.
>>
>>> - sqrt/rsqrt are guaranteed to take the absolute value of their argument
>>
>>
>> Is that necessary? If the software knows about the ext, it also knows
>> to stick the abs() in.
>>
>>> - anything else?
>>
>>
>> I'd say MESA_shader_float_zero_wins or something, if we don't stick
>> the sqrt stuff into it.
>
>
> Well, I don't know the intricacies of DX9. I agree that apps can do the
> abs() themselves, so if the 0*x behavior is really the only other
> difference, then zero_wins is a fine name as well.
>
> Cheers,
> Nicolai
>
>
>>
>> Here is a software model of the Tesla-era shader execution created by
>> Marcin (mwk):
>>
>> https://github.com/envytools/envytools/blob/master/nvhw/fp.c#L168
>>
>> The bit in question is "zero_wins", so just look at what that
>> modifies. So if you have a*b and a || b == 0 (i.e. they are +0 or -0)
>> then +0 is returned.
>>
>>>
>>> It is a link error to link a program in which some shaders have the
>>> extension and others don't.
>>>
>>> There's funny interactions like having compute shaders with dx9 float
>>> semantics, but that's pretty much it :)
>>>
>>> Somebody just needs to write up a draft, but it only makes sense if we
>>> can
>>> at least get all the Mesa drivers and Wine behind this. If there's a
>>> simple
>>> global flag on NVidia hardware, then it should be easy to provide an
>>> initial
>>> implementation for nouveau *hint* *hint* ;)
>>>
>>> For radeonsi, more work is required (like support in LLVM).
>>
>>
>> All sounds good to me. Should be relatively straightforward for nouveau.

Wine can also do what Nine does. Its RSQ implementation is:
min(FLT_MAX, rsq(abs(x)))

That min() expression gets rid of +inf and also NaNs, because a
non-NaN number wins (here FLT_MAX) on radeonsi.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mapi: update the asm code to support x32

2017-01-12 Thread Nicolai Hähnle

On 13.01.2017 00:47, Grazvydas Ignotas wrote:

As the branchpoint is near, can someone push it, please?


Done, thanks! (This time for real ;-))



Gražvydas

On Sun, Jan 8, 2017 at 11:22 PM, Matt Turner  wrote:

On Sun, Jan 8, 2017 at 9:38 AM, Grazvydas Ignotas  wrote:

Fixes crashes when both glx-tls and asm are enabled on x32.

Cc: mesa-sta...@lists.freedesktop.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94512
Signed-off-by: Grazvydas Ignotas 


Thanks, this is great. Who ever commits this (maybe me :) should also put

Bugzilla: https://bugs.gentoo.org/show_bug.cgi?id=575458

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac/debug: move .gitignore for sid_tables.h too

2017-01-12 Thread Nicolai Hähnle

On 13.01.2017 00:49, Grazvydas Ignotas wrote:

On Thu, Jan 12, 2017 at 12:26 PM, Nicolai Hähnle  wrote:

On 12.01.2017 00:58, Grazvydas Ignotas wrote:


b838f642 "ac/debug: Move sid_tables.h generation to common code." moved
sid_tables.h but forgot the corresponding .gitignore.

Signed-off-by: Grazvydas Ignotas 
---
no commit access



Pushed, thanks!


Hmm, somehow I don't see it in master.


I missed a push error due to somebody getting between my pushes, but it 
should be there as of half an hour ago or so.


Nicolai






 src/amd/common/.gitignore   | 1 +
 src/gallium/drivers/radeonsi/.gitignore | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 src/amd/common/.gitignore
 delete mode 100644 src/gallium/drivers/radeonsi/.gitignore

diff --git a/src/amd/common/.gitignore b/src/amd/common/.gitignore
new file mode 100644
index 000..e0ee798
--- /dev/null
+++ b/src/amd/common/.gitignore
@@ -0,0 +1 @@
+sid_tables.h
diff --git a/src/gallium/drivers/radeonsi/.gitignore
b/src/gallium/drivers/radeonsi/.gitignore
deleted file mode 100644
index e0ee798..000
--- a/src/gallium/drivers/radeonsi/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-sid_tables.h




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 22/22] anv: Avoid some resolves for samplable HiZ buffers

2017-01-12 Thread Jason Ekstrand
On Wed, Jan 11, 2017 at 5:55 PM, Nanley Chery  wrote:

> Signed-off-by: Nanley Chery 
> ---
>  src/intel/vulkan/genX_cmd_buffer.c | 54 +-
> 
>  1 file changed, 41 insertions(+), 13 deletions(-)
>
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 447baa08b2..11745f8b9e 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -311,11 +311,21 @@ need_input_attachment_state(const struct
> anv_render_pass_attachment *att)
>  }
>
>  static enum isl_aux_usage
> -layout_to_hiz_usage(VkImageLayout layout)
> +layout_to_hiz_usage(VkImageLayout layout, uint8_t samples)
>  {
> switch (layout) {
> case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
>return ISL_AUX_USAGE_HIZ;
> +   case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
> +   case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
> +  if (anv_can_sample_with_hiz(GEN_GEN, samples))
> + return ISL_AUX_USAGE_HIZ;
> +  /* Fall-through */
> +   case VK_IMAGE_LAYOUT_GENERAL:
> +  /* This buffer could be used as a source or destination in a
> transfer
> +   * operation. Transfer operations current don't perform HiZ-enabled
> reads
> +   * and writes.
> +   */
> default:
>return ISL_AUX_USAGE_NONE;
> }
> @@ -336,26 +346,43 @@ transition_depth_buffer(struct anv_cmd_buffer
> *cmd_buffer,
> if (image->aux_usage != ISL_AUX_USAGE_HIZ)
>return;
>
> -   const bool hiz_enabled = layout_to_hiz_usage(initial_layout) ==
> +   const bool hiz_enabled = layout_to_hiz_usage(initial_layout,
> image->samples) ==
>  ISL_AUX_USAGE_HIZ;
> -   const bool enable_hiz = layout_to_hiz_usage(final_layout) ==
> +   const bool enable_hiz = layout_to_hiz_usage(final_layout,
> image->samples) ==
> ISL_AUX_USAGE_HIZ;
>
> +   /* Images that have sampling with HiZ enabled cause all shader
> sampling to
> +* load data with the HiZ buffer. Therefore, in the case of
> transitioning to
> +* the general layout - which currently routes all writes to the depth
> +* buffer - we must ensure that the HiZ buffer remains consistent with
> the
> +* depth buffer by performing a HIZ resolve after performing the
> resolve
> +* required by this transition (if not already HiZ).
> +*/
> +   const bool needs_hiz_resolve = final_layout == VK_IMAGE_LAYOUT_GENERAL
> &&
> +  (hiz_enabled || initial_layout == VK_IMAGE_LAYOUT_UNDEFINED) &&
> +  anv_can_sample_with_hiz(GEN_GEN, image->samples);
> +
> /* We've already initialized the aux HiZ buffer at BindImageMemory
> time,
>  * so there's no need to perform a HIZ resolve or clear to avoid GPU
> hangs.
>  * This initial layout indicates that the user doesn't care about the
> data
> -* that's currently in the buffer, so no resolves are necessary.
> +* that's currently in the buffer, so resolves are not necessary
> except for
> +* the case mentioned above.
>  */
> -   if (initial_layout == VK_IMAGE_LAYOUT_UNDEFINED)
> +   if (!needs_hiz_resolve && initial_layout == VK_IMAGE_LAYOUT_UNDEFINED)
>return;
>
> -   if (hiz_enabled == enable_hiz) {
> -  /* The same buffer will be used, no resolves are necessary */
> -   } else if (hiz_enabled && !enable_hiz) {
> -  anv_gen8_hiz_op_resolve(cmd_buffer, image,
> BLORP_HIZ_OP_DEPTH_RESOLVE);
> +   if (!hiz_enabled && enable_hiz) {
> + anv_gen8_hiz_op_resolve(cmd_buffer, image,
> BLORP_HIZ_OP_HIZ_RESOLVE);
> } else {
> -  assert(!hiz_enabled && enable_hiz);
> -  anv_gen8_hiz_op_resolve(cmd_buffer, image,
> BLORP_HIZ_OP_HIZ_RESOLVE);
> +  if (hiz_enabled == enable_hiz) {
> + /* If the same buffer will be used, no resolves are necessary
> except
> +  * for the special case noted above.
> +  */
> +  } else if (hiz_enabled && !enable_hiz) {
> + anv_gen8_hiz_op_resolve(cmd_buffer, image,
> BLORP_HIZ_OP_DEPTH_RESOLVE);
> +  }
> +  if (needs_hiz_resolve)
>
+ anv_gen8_hiz_op_resolve(cmd_buffer, image,
> BLORP_HIZ_OP_HIZ_RESOLVE);
>

I think this function would be way easier to read if it was structured a
bit differently.  How about

enum blorp_hiz_op hiz_op;
if (initial_layout == UNDEFINED) {
   /* comment */
   hiz_op = BLORP_HIZ_OP_NONE;
} else if (hiz_enabled && !enable_hiz) {
   hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
} else if (!hiz_enabled && enable_hiz) {
   hiz_op = BLORP_HIZ_OP_HIZ_RESOLVE;
} else {
   hiz_op = BLORP_HIZ_OP_NONE;
}

if (hiz_op != BLORP_HIZ_OP_NONE)
   anv_gen8_hiz_op_resolve(cmd_buffer, image, hiz_op)

/* comment */
if (final_layout == GENERAL && can_sample_from_hiz && hiz_op !=
BLORP_HIZ_OP_HIZ_RESOLVE)
   anv_gen8_hiz_op_resolve(cmd_buffer, image, BLORP_HIZ_OP_HIZ_RESOLVE);

I *think* that accomplishes the same thing and it makes way more sense in
my brain than all of the 

Re: [Mesa-dev] [PATCH] ac/debug: move .gitignore for sid_tables.h too

2017-01-12 Thread Grazvydas Ignotas
On Thu, Jan 12, 2017 at 12:26 PM, Nicolai Hähnle  wrote:
> On 12.01.2017 00:58, Grazvydas Ignotas wrote:
>>
>> b838f642 "ac/debug: Move sid_tables.h generation to common code." moved
>> sid_tables.h but forgot the corresponding .gitignore.
>>
>> Signed-off-by: Grazvydas Ignotas 
>> ---
>> no commit access
>
>
> Pushed, thanks!

Hmm, somehow I don't see it in master.

>
>
>>
>>  src/amd/common/.gitignore   | 1 +
>>  src/gallium/drivers/radeonsi/.gitignore | 1 -
>>  2 files changed, 1 insertion(+), 1 deletion(-)
>>  create mode 100644 src/amd/common/.gitignore
>>  delete mode 100644 src/gallium/drivers/radeonsi/.gitignore
>>
>> diff --git a/src/amd/common/.gitignore b/src/amd/common/.gitignore
>> new file mode 100644
>> index 000..e0ee798
>> --- /dev/null
>> +++ b/src/amd/common/.gitignore
>> @@ -0,0 +1 @@
>> +sid_tables.h
>> diff --git a/src/gallium/drivers/radeonsi/.gitignore
>> b/src/gallium/drivers/radeonsi/.gitignore
>> deleted file mode 100644
>> index e0ee798..000
>> --- a/src/gallium/drivers/radeonsi/.gitignore
>> +++ /dev/null
>> @@ -1 +0,0 @@
>> -sid_tables.h
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mapi: update the asm code to support x32

2017-01-12 Thread Grazvydas Ignotas
As the branchpoint is near, can someone push it, please?

Gražvydas

On Sun, Jan 8, 2017 at 11:22 PM, Matt Turner  wrote:
> On Sun, Jan 8, 2017 at 9:38 AM, Grazvydas Ignotas  wrote:
>> Fixes crashes when both glx-tls and asm are enabled on x32.
>>
>> Cc: mesa-sta...@lists.freedesktop.org
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94512
>> Signed-off-by: Grazvydas Ignotas 
>
> Thanks, this is great. Who ever commits this (maybe me :) should also put
>
> Bugzilla: https://bugs.gentoo.org/show_bug.cgi?id=575458
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] NaN behavior in GLSL (was Re: [PATCH] glsl: always do sqrt(abs()) and inversesqrt(abs()))

2017-01-12 Thread Nicolai Hähnle

On 13.01.2017 00:20, Ilia Mirkin wrote:

On Thu, Jan 12, 2017 at 6:04 PM, Nicolai Hähnle  wrote:

On 12.01.2017 23:46, Ilia Mirkin wrote:


On Thu, Jan 12, 2017 at 4:03 PM, Matteo Bruni 
wrote:


So, what would be really nice to have is a GLSL extension for some
kind of switch to select the requested behavior WRT NaN. For example a
three-way option with "don't generate NaN in arithmetic operations",
"do generate NaN" and "don't care". It could also be a GL state if
that's easier to implement with the existing hardware, since an
individual application isn't supposed to require different behavior
from one shader to the next.

Is anyone interested in / favorable to something like this? It would
solve the issue with defining NaN behavior in GLSL while making things
a bit more compatible with "other API a lot of games are ported from
which happens to be supported by all the desktop GPUs".



Not that I'm biased, but on the NVIDIA Tesla series (G80-GT21x), this
enable is handled via a global flag, not in the shader binary, so this
is all-or-nothing for a whole pipeline. On GF100+, I believe there is
also an enable via a global flag, but there are also a FMUL.FMZ (and
FFMA.FMZ) flag, which I *think* has the same effect. So for GF100+ hw,
this could be done at the instruction level.



Well, I would also have advocated for what is effectively a
per-program/pipeline flag anyway, even though GCN hardware can theoretically
do it per-instruction. Tracking a per-instruction bit in the compiler
quickly becomes fragile (e.g. there's no good way for us to model this
information per-instruction in LLVM IR). Per-shader isn't any better than
per-instruction due to linking, and per-shader-stage is awkward if we ever
want to do fancier cross-stage optimizations.

It's really quite simple. Introduce an extension with a name like
MESA_shader_float_dx9. The behavior I'd suggest is:

Enabling/requiring the extension in a shader causes various semantics
changes to bring floating point behavior in line with DX9 in that shader's
code:

- 0*x = 0


Yes. But only for fp32, not for fp64.


- sqrt/rsqrt are guaranteed to take the absolute value of their argument


Is that necessary? If the software knows about the ext, it also knows
to stick the abs() in.


- anything else?


I'd say MESA_shader_float_zero_wins or something, if we don't stick
the sqrt stuff into it.


Well, I don't know the intricacies of DX9. I agree that apps can do the 
abs() themselves, so if the 0*x behavior is really the only other 
difference, then zero_wins is a fine name as well.


Cheers,
Nicolai



Here is a software model of the Tesla-era shader execution created by
Marcin (mwk):

https://github.com/envytools/envytools/blob/master/nvhw/fp.c#L168

The bit in question is "zero_wins", so just look at what that
modifies. So if you have a*b and a || b == 0 (i.e. they are +0 or -0)
then +0 is returned.



It is a link error to link a program in which some shaders have the
extension and others don't.

There's funny interactions like having compute shaders with dx9 float
semantics, but that's pretty much it :)

Somebody just needs to write up a draft, but it only makes sense if we can
at least get all the Mesa drivers and Wine behind this. If there's a simple
global flag on NVidia hardware, then it should be easy to provide an initial
implementation for nouveau *hint* *hint* ;)

For radeonsi, more work is required (like support in LLVM).


All sounds good to me. Should be relatively straightforward for nouveau.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 6/9] radeonsi: remove unused si_prepare_cube_coords

2017-01-12 Thread Nicolai Hähnle

On 12.01.2017 21:42, Marek Olšák wrote:

I commented on patches 1 & 2. Other than that, patches 1-6 are:

Reviewed-by: Marek Olšák 


Thanks. I'm especially curious in hindsight how the 1.5 got dropped 
again. Anyway, I used git rebase -x to double-check that each step 
compiles before pushing.


Nicolai



Marek

On Thu, Jan 12, 2017 at 4:39 PM, Nicolai Hähnle  wrote:

From: Nicolai Hähnle 

Reviewed-by: Bas Nieuwenhuizen 
---
 src/gallium/drivers/radeonsi/si_shader_internal.h |   4 -
 src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 196 --
 2 files changed, 200 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 6b3ac17..51d491e 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -208,16 +208,12 @@ LLVMValueRef si_llvm_emit_fetch(struct 
lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
enum tgsi_opcode_type type,
unsigned swizzle);

 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_instruction *inst,
const struct tgsi_opcode_info *info,
LLVMValueRef dst[4]);

 void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base);
-void si_prepare_cube_coords(struct lp_build_tgsi_context *bld_base,
-   struct lp_build_emit_data *emit_data,
-   LLVMValueRef *coords_arg,
-   LLVMValueRef *derivs_arg);

 #endif
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
index 11bd45d..085fd5b 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
@@ -899,206 +899,10 @@ void si_shader_context_init_alu(struct 
lp_build_tgsi_context *bld_base)
bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = emit_uadd;
bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = emit_shl;
bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = emit_ushr;
bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = emit_ishr;

bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = emit_umod;
bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = emit_mod;
bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = emit_udiv;
bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = emit_idiv;
 }
-
-/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
- * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
- * already multiplied by two. id is the cube face number.
- */
-struct cube_selection_coords {
-   LLVMValueRef stc[2];
-   LLVMValueRef ma;
-   LLVMValueRef id;
-};
-
-static void build_cube_intrinsic(struct gallivm_state *gallivm,
-LLVMValueRef in[3],
-struct cube_selection_coords *out)
-{
-   LLVMBuilderRef builder = gallivm->builder;
-
-   if (HAVE_LLVM >= 0x0309) {
-   LLVMTypeRef f32 = LLVMTypeOf(in[0]);
-
-   out->stc[1] = lp_build_intrinsic(builder, "llvm.amdgcn.cubetc",
-   f32, in, 3, LP_FUNC_ATTR_READNONE);
-   out->stc[0] = lp_build_intrinsic(builder, "llvm.amdgcn.cubesc",
-   f32, in, 3, LP_FUNC_ATTR_READNONE);
-   out->ma = lp_build_intrinsic(builder, "llvm.amdgcn.cubema",
-   f32, in, 3, LP_FUNC_ATTR_READNONE);
-   out->id = lp_build_intrinsic(builder, "llvm.amdgcn.cubeid",
-   f32, in, 3, LP_FUNC_ATTR_READNONE);
-   } else {
-   LLVMValueRef c[4] = {
-   in[0],
-   in[1],
-   in[2],
-   LLVMGetUndef(LLVMTypeOf(in[0]))
-   };
-   LLVMValueRef vec = lp_build_gather_values(gallivm, c, 4);
-
-   LLVMValueRef tmp =
-   lp_build_intrinsic(builder, "llvm.AMDGPU.cube",
- LLVMTypeOf(vec), , 1,
- LP_FUNC_ATTR_READNONE);
-
-   out->stc[1] = LLVMBuildExtractElement(builder, tmp,
-   lp_build_const_int32(gallivm, 0), "");
-   out->stc[0] = LLVMBuildExtractElement(builder, tmp,
-   lp_build_const_int32(gallivm, 1), "");
-   out->ma = LLVMBuildExtractElement(builder, tmp,
-   lp_build_const_int32(gallivm, 2), "");
-   out->id = 

Re: [Mesa-dev] [PATCH 12/22] i965/vec4: split DF instructions and later duplicate its execsize in IVB/VLV

2017-01-12 Thread Matt Turner
On Thu, Jan 5, 2017 at 5:07 AM, Samuel Iglesias Gonsálvez
 wrote:
> We need to split DF instructions in two on IVB/VLV as it needs an
> execsize 8 to process 4 DF values (one GRF in total).
>
> Signed-off-by: Samuel Iglesias Gonsálvez 
> ---
>  src/mesa/drivers/dri/i965/brw_ir_vec4.h  |  1 +
>  src/mesa/drivers/dri/i965/brw_vec4.cpp   | 20 +++-
>  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 15 +--
>  3 files changed, 33 insertions(+), 3 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h 
> b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> index 57fc6be..9291be0 100644
> --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> @@ -291,6 +291,7 @@ public:
> bool can_do_writemask(const struct gen_device_info *devinfo);
> bool can_change_types() const;
> bool has_source_and_destination_hazard() const;
> +   unsigned exec_data_size() const;
>
> bool is_align1_partial_write()
> {
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index b5e846d..6ba85d7 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -301,6 +301,19 @@ vec4_instruction::can_change_types() const
>  !src[1].abs && !src[1].negate));
>  }
>
> +unsigned
> +vec4_instruction::exec_data_size() const
> +{
> +  unsigned exec_data_size = 0;
> +
> +  for (int i = 0; i < 3; i++) {
> +if (this->src[i].type != BAD_FILE)
> +  exec_data_size = MAX2(exec_data_size, type_sz(this->src[i].type));

Indentation, use braces.

> +  }
> +
> +  return exec_data_size;
> +}

I think this should become a static inline function for the reasons
Curro mentioned in the fs patch.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/22] i965/fs: fix lower SIMD width for IVB/VLV's MOVE_INDIRECT

2017-01-12 Thread Matt Turner
On Thu, Jan 5, 2017 at 5:07 AM, Samuel Iglesias Gonsálvez
 wrote:
> From: "Juan A. Suarez Romero" 
>

s/MOVE_INDIRECT/MOV_INDIRECT/ in the subject

> Previous to Broadwell, we have 8 registers for MOV_INDIRECT. But if
> IVB/VLV deal with DFs, we will duplicate the exec_size from 8 to 16.
>
> This patch limits the SIMD width to 4 in this case.
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index cfce364..45d320d 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -4959,8 +4959,13 @@ get_lowered_simd_width(const struct gen_device_info 
> *devinfo,
>return MIN2(8, inst->exec_size);
>
> case SHADER_OPCODE_MOV_INDIRECT:
> -  /* Prior to Broadwell, we only have 8 address subregisters */
> -  return MIN3(devinfo->gen >= 8 ? 16 : 8,
> +  /* Prior to Broadwell, we only have 8 address subregisters. Special 
> case
> +   * for IVB/VLV and DF types: set to 4 (exec_size will be later
> +   * duplicated).
> +   */
> +  return MIN3(devinfo->gen >= 8 ? 16 : ((devinfo->gen == 7 &&
> + !devinfo->is_haswell &&
> + inst->exec_data_size() == 8) ? 
> 4 : 8),

This is getting complicated. Lets write this as

unsigned num_address_regs;
if (devinfo->gen >= 8) {
   num_address_regs = 16;
} else if (devinfo->is_haswell || get_exec_type_size(inst) != 8) {
   num_address_regs = 8;
} else {
   num_address_regs = 4;
}

return MIN3(num_address_regs, ...)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] NaN behavior in GLSL (was Re: [PATCH] glsl: always do sqrt(abs()) and inversesqrt(abs()))

2017-01-12 Thread Ilia Mirkin
On Thu, Jan 12, 2017 at 6:04 PM, Nicolai Hähnle  wrote:
> On 12.01.2017 23:46, Ilia Mirkin wrote:
>>
>> On Thu, Jan 12, 2017 at 4:03 PM, Matteo Bruni 
>> wrote:
>>>
>>> So, what would be really nice to have is a GLSL extension for some
>>> kind of switch to select the requested behavior WRT NaN. For example a
>>> three-way option with "don't generate NaN in arithmetic operations",
>>> "do generate NaN" and "don't care". It could also be a GL state if
>>> that's easier to implement with the existing hardware, since an
>>> individual application isn't supposed to require different behavior
>>> from one shader to the next.
>>>
>>> Is anyone interested in / favorable to something like this? It would
>>> solve the issue with defining NaN behavior in GLSL while making things
>>> a bit more compatible with "other API a lot of games are ported from
>>> which happens to be supported by all the desktop GPUs".
>>
>>
>> Not that I'm biased, but on the NVIDIA Tesla series (G80-GT21x), this
>> enable is handled via a global flag, not in the shader binary, so this
>> is all-or-nothing for a whole pipeline. On GF100+, I believe there is
>> also an enable via a global flag, but there are also a FMUL.FMZ (and
>> FFMA.FMZ) flag, which I *think* has the same effect. So for GF100+ hw,
>> this could be done at the instruction level.
>
>
> Well, I would also have advocated for what is effectively a
> per-program/pipeline flag anyway, even though GCN hardware can theoretically
> do it per-instruction. Tracking a per-instruction bit in the compiler
> quickly becomes fragile (e.g. there's no good way for us to model this
> information per-instruction in LLVM IR). Per-shader isn't any better than
> per-instruction due to linking, and per-shader-stage is awkward if we ever
> want to do fancier cross-stage optimizations.
>
> It's really quite simple. Introduce an extension with a name like
> MESA_shader_float_dx9. The behavior I'd suggest is:
>
> Enabling/requiring the extension in a shader causes various semantics
> changes to bring floating point behavior in line with DX9 in that shader's
> code:
>
> - 0*x = 0

Yes. But only for fp32, not for fp64.

> - sqrt/rsqrt are guaranteed to take the absolute value of their argument

Is that necessary? If the software knows about the ext, it also knows
to stick the abs() in.

> - anything else?

I'd say MESA_shader_float_zero_wins or something, if we don't stick
the sqrt stuff into it.

Here is a software model of the Tesla-era shader execution created by
Marcin (mwk):

https://github.com/envytools/envytools/blob/master/nvhw/fp.c#L168

The bit in question is "zero_wins", so just look at what that
modifies. So if you have a*b and a || b == 0 (i.e. they are +0 or -0)
then +0 is returned.

>
> It is a link error to link a program in which some shaders have the
> extension and others don't.
>
> There's funny interactions like having compute shaders with dx9 float
> semantics, but that's pretty much it :)
>
> Somebody just needs to write up a draft, but it only makes sense if we can
> at least get all the Mesa drivers and Wine behind this. If there's a simple
> global flag on NVidia hardware, then it should be easy to provide an initial
> implementation for nouveau *hint* *hint* ;)
>
> For radeonsi, more work is required (like support in LLVM).

All sounds good to me. Should be relatively straightforward for nouveau.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/22] i965/fs: fix dst stride in IVB/VLV type conversions

2017-01-12 Thread Matt Turner
On Thu, Jan 5, 2017 at 5:07 AM, Samuel Iglesias Gonsálvez
 wrote:
> From: "Juan A. Suarez Romero" 
>
> When converting a DF to F, we set dst stride to 2, to fulfil alignment
> restrictions.
>
> But in IVB/VLV, this is not necessary, as each DF conversion already
> writes 2 F, the first one the real value, and the second one a 0. That
> is, IVB/VLV already set stride = 2 implicitly, so we must set it to 1
> explicitly to avoid ending up with stride = 4.
> ---
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 10 ++
>  1 file changed, 10 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index ac2d8ad..6967584 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -1634,6 +1634,16 @@ fs_generator::generate_code(const cfg_t *cfg, int 
> dispatch_width)
> inst->src[i].type != BRW_REGISTER_TYPE_UD ||
> !inst->src[i].negate);
>}
> +  /* When converting from DF->F, we set destination's stride as 2 as an
> +   * aligment requirement. But in IVB/VLV, each DF implicitly writes 2 F,

Typo: alignment
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] anv: generate entry points from vk.xml

2017-01-12 Thread Jason Ekstrand
I made a quick comment below.  With that, this patch is

Reviewed-by: Jason Ekstrand 

Assuming you pulled 1 and 2 from master of the public github repo for
Khronos, they are

Acked-by: Jason Ekstrand 

On Thu, Jan 12, 2017 at 1:43 PM, Lionel Landwerlin <
lionel.g.landwer...@intel.com> wrote:

> Signed-off-by: Lionel Landwerlin 
> ---
>  src/intel/vulkan/Makefile.am|  15 ++--
>  src/intel/vulkan/anv_entrypoints_gen.py | 125
> +---
>  2 files changed, 71 insertions(+), 69 deletions(-)
>
> diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am
> index df7645fb13..d32b57f267 100644
> --- a/src/intel/vulkan/Makefile.am
> +++ b/src/intel/vulkan/Makefile.am
> @@ -23,11 +23,6 @@ include Makefile.sources
>
>  vulkan_includedir = $(includedir)/vulkan
>
> -vulkan_include_HEADERS = \
> -   $(top_srcdir)/include/vulkan/vk_platform.h \
> -   $(top_srcdir)/include/vulkan/vulkan.h \
> -   $(top_srcdir)/include/vulkan/vulkan_intel.h
> -
>  lib_LTLIBRARIES = libvulkan_intel.la
>
>  check_LTLIBRARIES = libvulkan-test.la
> @@ -138,12 +133,14 @@ VULKAN_LIB_DEPS += \
>  nodist_EXTRA_libvulkan_intel_la_SOURCES = dummy.cpp
>  libvulkan_intel_la_SOURCES = $(VULKAN_GEM_FILES)
>
> -anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS)
> -   $(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
> +vulkan_api_xml = $(top_srcdir)/src/vulkan/registry/vk.xml
> +
> +anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_api_xml)
> +   $(AM_V_GEN) cat $(vulkan_api_xml) |\
> $(PYTHON2) $(srcdir)/anv_entrypoints_gen.py header > $@
>
> -anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS)
> -   $(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
> +anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_api_xml)
> +   $(AM_V_GEN) cat $(vulkan_api_xml) |\
> $(PYTHON2) $(srcdir)/anv_entrypoints_gen.py code > $@
>
>  BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
> diff --git a/src/intel/vulkan/anv_entrypoints_gen.py
> b/src/intel/vulkan/anv_entrypoints_gen.py
> index 9d23dbb358..e303b7e8e8 100644
> --- a/src/intel/vulkan/anv_entrypoints_gen.py
> +++ b/src/intel/vulkan/anv_entrypoints_gen.py
> @@ -23,13 +23,7 @@
>  #
>
>  import fileinput, re, sys
> -
> -# Each function typedef in the vulkan.h header is all on one line and
> matches
> -# this regepx. We hope that won't change.
> -
> -p = re.compile('typedef ([^ ]*) *\((?:VKAPI_PTR)?
> *\*PFN_vk([^(]*)\)(.*);')
> -
> -entrypoints = []
> +import xml.etree.ElementTree as ET
>
>  # We generate a static hash table for entry point lookup
>  # (vkGetProcAddress). We use a linear congruential generator for our hash
> @@ -51,29 +45,11 @@ def hash(name):
>
>  return h
>
> -def get_platform_guard_macro(name):
> -if "Xlib" in name:
> -return "VK_USE_PLATFORM_XLIB_KHR"
> -elif "Xcb" in name:
> -return "VK_USE_PLATFORM_XCB_KHR"
> -elif "Wayland" in name:
> -return "VK_USE_PLATFORM_WAYLAND_KHR"
> -elif "Mir" in name:
> -return "VK_USE_PLATFORM_MIR_KHR"
> -elif "Android" in name:
> -return "VK_USE_PLATFORM_ANDROID_KHR"
> -elif "Win32" in name:
> -return "VK_USE_PLATFORM_WIN32_KHR"
> -else:
> -return None
> -
> -def print_guard_start(name):
> -guard = get_platform_guard_macro(name)
> +def print_guard_start(guard):
>  if guard is not None:
>  print "#ifdef {0}".format(guard)
>
> -def print_guard_end(name):
> -guard = get_platform_guard_macro(name)
> +def print_guard_end(guard):
>  if guard is not None:
>  print "#endif // {0}".format(guard)
>
> @@ -87,18 +63,48 @@ elif (sys.argv[1] == "code"):
>  opt_code = True
>  sys.argv.pop()
>
> -# Parse the entry points in the header
> -
> -i = 0
> -for line in fileinput.input():
> -m  = p.match(line)
> -if (m):
> -if m.group(2) == 'VoidFunction':
> -continue
> -fullname = "vk" + m.group(2)
> -h = hash(fullname)
> -entrypoints.append((m.group(1), m.group(2), m.group(3), i, h))
> -i = i + 1
> +# Extract the entry points from the registry
> +def get_entrypoints(doc, entrypoints_to_defines):
> +i = 0
> +entrypoints = []
> +commands = doc.findall('./commands/command')
> +for command in commands:
>

If you use python's "itertools", you can avoid incrementing "i".  Just do

for i, command in itertools.iterate(commands):


> +type = command.find('./proto/type').text
> +name = command.find('./proto/name').text[2:]
> +params = map(lambda p: "".join(p.itertext()),
> command.findall('./param'))
> +params = ', '.join(params)
> +if name in entrypoints_to_defines:
> +guard = entrypoints_to_defines[name]
> +else:
> +guard = None
> +entrypoints.append((type, name, params, i, hash(name), guard))
> +i += 1
> +return 

Re: [Mesa-dev] [PATCH 02/22] i965/fs: add helper to retrieve instruction data size

2017-01-12 Thread Matt Turner
On Thu, Jan 5, 2017 at 5:07 AM, Samuel Iglesias Gonsálvez
 wrote:
> From: "Juan A. Suarez Romero" 
>
> The execution data size is the biggest type size of any instruction
> operand.
>
> We will use it to know if the instruction deals with DF, because in Ivy
> we need to duplicate the execution size and regioning parameters.
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp  | 19 ++-
>  src/mesa/drivers/dri/i965/brw_ir_fs.h |  1 +
>  2 files changed, 15 insertions(+), 5 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index c8a0693..eb3b4aa 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -340,6 +340,19 @@ fs_inst::has_source_and_destination_hazard() const
> }
>  }
>
> +unsigned
> +fs_inst::exec_data_size() const
> +{
> +  unsigned exec_data_size = 0;
> +
> +  for (int i = 0; i < this->sources; i++) {
> +if (this->src[i].type != BAD_FILE)
> +  exec_data_size = MAX2(exec_data_size, type_sz(this->src[i].type));

Indentation is wrong (looks like 2 space, but should be 3 space). Also
please use braces in nested control flow.

> +  }
> +
> +  return exec_data_size;
> +}
> +
>  bool
>  fs_inst::is_copy_payload(const brw::simple_allocator _alloc) const
>  {
> @@ -4577,11 +4590,7 @@ get_fpu_lowered_simd_width(const struct 
> gen_device_info *devinfo,
> !inst->force_writemask_all) {
>const unsigned channels_per_grf = inst->exec_size /
>   DIV_ROUND_UP(inst->size_written, REG_SIZE);
> -  unsigned exec_type_size = 0;
> -  for (int i = 0; i < inst->sources; i++) {
> - if (inst->src[i].file != BAD_FILE)
> -exec_type_size = MAX2(exec_type_size, 
> type_sz(inst->src[i].type));
> -  }
> +  unsigned exec_type_size = inst->exec_data_size();
>assert(exec_type_size);
>
>/* The hardware shifts exactly 8 channels per compressed half of the
> diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h 
> b/src/mesa/drivers/dri/i965/brw_ir_fs.h
> index cad3712..9875f2d 100644
> --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
> @@ -349,6 +349,7 @@ public:
> bool can_change_types() const;
> bool has_side_effects() const;
> bool has_source_and_destination_hazard() const;
> +   unsigned exec_data_size() const;

In the reworked patch in your new branch, this still exists even
though the function is no longer a member.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] NaN behavior in GLSL (was Re: [PATCH] glsl: always do sqrt(abs()) and inversesqrt(abs()))

2017-01-12 Thread Nicolai Hähnle

On 12.01.2017 23:46, Ilia Mirkin wrote:

On Thu, Jan 12, 2017 at 4:03 PM, Matteo Bruni  wrote:

So, what would be really nice to have is a GLSL extension for some
kind of switch to select the requested behavior WRT NaN. For example a
three-way option with "don't generate NaN in arithmetic operations",
"do generate NaN" and "don't care". It could also be a GL state if
that's easier to implement with the existing hardware, since an
individual application isn't supposed to require different behavior
from one shader to the next.

Is anyone interested in / favorable to something like this? It would
solve the issue with defining NaN behavior in GLSL while making things
a bit more compatible with "other API a lot of games are ported from
which happens to be supported by all the desktop GPUs".


Not that I'm biased, but on the NVIDIA Tesla series (G80-GT21x), this
enable is handled via a global flag, not in the shader binary, so this
is all-or-nothing for a whole pipeline. On GF100+, I believe there is
also an enable via a global flag, but there are also a FMUL.FMZ (and
FFMA.FMZ) flag, which I *think* has the same effect. So for GF100+ hw,
this could be done at the instruction level.


Well, I would also have advocated for what is effectively a 
per-program/pipeline flag anyway, even though GCN hardware can 
theoretically do it per-instruction. Tracking a per-instruction bit in 
the compiler quickly becomes fragile (e.g. there's no good way for us to 
model this information per-instruction in LLVM IR). Per-shader isn't any 
better than per-instruction due to linking, and per-shader-stage is 
awkward if we ever want to do fancier cross-stage optimizations.


It's really quite simple. Introduce an extension with a name like 
MESA_shader_float_dx9. The behavior I'd suggest is:


Enabling/requiring the extension in a shader causes various semantics 
changes to bring floating point behavior in line with DX9 in that 
shader's code:


- 0*x = 0
- sqrt/rsqrt are guaranteed to take the absolute value of their argument
- anything else?

It is a link error to link a program in which some shaders have the 
extension and others don't.


There's funny interactions like having compute shaders with dx9 float 
semantics, but that's pretty much it :)


Somebody just needs to write up a draft, but it only makes sense if we 
can at least get all the Mesa drivers and Wine behind this. If there's a 
simple global flag on NVidia hardware, then it should be easy to provide 
an initial implementation for nouveau *hint* *hint* ;)


For radeonsi, more work is required (like support in LLVM).

Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97067] WebGL: conformance/glsl/misc/shaders-with-invariance.html Fail

2017-01-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97067

--- Comment #3 from Luke  ---
Tests still failing on 13.0 with Arch

$ uname -a
Linux arch 4.8.13-1-ARCH #1 SMP PREEMPT Fri Dec 9 07:24:34 CET 2016 x86_64
GNU/Linux

$ glxinfo |grep -i opengl
OpenGL vendor string: nouveau
OpenGL renderer string: Gallium 0.4 on NVE7
OpenGL core profile version string: 4.3 (Core Profile) Mesa 13.0.3

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] NaN behavior in GLSL (was Re: [PATCH] glsl: always do sqrt(abs()) and inversesqrt(abs()))

2017-01-12 Thread Matteo Bruni
2017-01-12 23:41 GMT+01:00 Axel Davy :
>> Do you refer to the d3d9 MAD or the hardware instruction? If the
>> former, just generating MUL and ADD separately should do the trick. In
>> the latter case, I guess that means the "NaN switch" should also
>> affect code generation (although I think that should be already
>> covered by the "precise" qualifier.)
>>
> all radeon card released so far have special instructions for both mul and
> mad to have 0*inf = 0.
>
>
> I guess you'd need some gl extension to use them if available.

I see. Does it need to be a separate extension though? I.e. isn't it
enough if the driver uses those special instructions when we don't
want NaN and use the normal instructions otherwise?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] NaN behavior in GLSL (was Re: [PATCH] glsl: always do sqrt(abs()) and inversesqrt(abs()))

2017-01-12 Thread Ilia Mirkin
On Thu, Jan 12, 2017 at 4:03 PM, Matteo Bruni  wrote:
> So, what would be really nice to have is a GLSL extension for some
> kind of switch to select the requested behavior WRT NaN. For example a
> three-way option with "don't generate NaN in arithmetic operations",
> "do generate NaN" and "don't care". It could also be a GL state if
> that's easier to implement with the existing hardware, since an
> individual application isn't supposed to require different behavior
> from one shader to the next.
>
> Is anyone interested in / favorable to something like this? It would
> solve the issue with defining NaN behavior in GLSL while making things
> a bit more compatible with "other API a lot of games are ported from
> which happens to be supported by all the desktop GPUs".

Not that I'm biased, but on the NVIDIA Tesla series (G80-GT21x), this
enable is handled via a global flag, not in the shader binary, so this
is all-or-nothing for a whole pipeline. On GF100+, I believe there is
also an enable via a global flag, but there are also a FMUL.FMZ (and
FFMA.FMZ) flag, which I *think* has the same effect. So for GF100+ hw,
this could be done at the instruction level.

Cheers,

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/32] [v2] gbm: Export a getter for per plane handles

2017-01-12 Thread Ben Widawsky

On 17-01-12 14:22:09, Jason Ekstrand wrote:

On Thu, Jan 12, 2017 at 10:57 AM, Daniel Stone  wrote:


Hi,

On 5 January 2017 at 04:43, Ben Widawsky  wrote:
> diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_
dri.c
> index c61d56b44a..f9c1afd8cb 100644
> --- a/src/gbm/backends/dri/gbm_dri.c
> +++ b/src/gbm/backends/dri/gbm_dri.c
> @@ -622,6 +622,33 @@ gbm_dri_bo_get_planes(struct gbm_bo *_bo)
> return get_number_planes(dri, bo->image);
>  }
>
> +static union gbm_bo_handle
> +gbm_dri_bo_get_handle_for_plane(struct gbm_bo *_bo, int plane)
> +{
> +   struct gbm_dri_device *dri = gbm_dri_device(_bo->gbm);
> +   struct gbm_dri_bo *bo = gbm_dri_bo(_bo);
> +   union gbm_bo_handle ret;
> +   ret.s32 = -1;
> +
> +   if (!dri->image || dri->image->base.version < 13 ||
!dri->image->fromPlanar) {
> +  errno = ENOSYS;
> +  return ret;
> +   }
> +
> +   if (plane >= get_number_planes(dri, bo->image))
> +  return ret;

if (!bo->image) {
   ret.s32 = bo->handle;
   return ret;
}



I suggested EINVAL if plane is too large.  Would you rather do a default
thing?


They aren't the same thing. Daniel is saying for cases where there is no
DRIimage (dumb BOs). Yours is for a bad plane. The two should be complementary.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] NaN behavior in GLSL (was Re: [PATCH] glsl: always do sqrt(abs()) and inversesqrt(abs()))

2017-01-12 Thread Axel Davy

On 12/01/2017 23:09, Matteo Bruni wrote:

2017-01-12 22:54 GMT+01:00 Axel Davy :

Preventing NaN from being generated is not sufficient to fix the 0*inf = 0
issue.

For example radeonsi does convert all NaN to zeros via a hardware setting.

But 0*inf = 0 behaviour should be also in mad, and with the NaN to zero
conversion, you get 0 * inf + 24 = 0 instead of 24.

Do you refer to the d3d9 MAD or the hardware instruction? If the
former, just generating MUL and ADD separately should do the trick. In
the latter case, I guess that means the "NaN switch" should also
affect code generation (although I think that should be already
covered by the "precise" qualifier.)

all radeon card released so far have special instructions for both mul 
and mad to have 0*inf = 0.



I guess you'd need some gl extension to use them if available.


Axel

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: add flt comparision simplification

2017-01-12 Thread Timothy Arceri
On Fri, 2017-01-13 at 08:49 +1100, Timothy Arceri wrote:
> Actually I might just try moving this to the late block as the reason
> for 'is_not_used_by_conditional' is to stop getting in the way of
> ffmas.

Ignore this that just results in the opts doing nothing. The whole
point is to avoid ffmas when they are not making use of the conditional
modifier.

> 
> On Fri, 2017-01-13 at 08:29 +1100, Timothy Arceri wrote:
> > Didn't turn out as useful as I'd hoped, but it will help alot more
> > on
> > i965 by reducing regressions when we drop
> > brw_do_channel_expressions()
> > and brw_do_vector_splitting().
> > 
> > I'm not sure how much sense 'is_not_used_by_conditional' makes on
> > platforms other than i965 but since this is a new opt it at least
> > won't do any harm.
> > 
> > shader-db BDW:
> > 
> > total instructions in shared programs: 13060410 -> 13060242 (-
> > 0.00%)
> > instructions in affected programs: 44880 -> 44712 (-0.37%)
> > helped: 86
> > HURT: 0
> > 
> > total cycles in shared programs: 256585692 -> 256584248 (-0.00%)
> > cycles in affected programs: 8060766 -> 8059322 (-0.02%)
> > helped: 177
> > HURT: 33
> > ---
> >  src/compiler/nir/nir_opt_algebraic.py |  4 
> >  src/compiler/nir/nir_search_helpers.h | 15 +++
> >  2 files changed, 19 insertions(+)
> > 
> > diff --git a/src/compiler/nir/nir_opt_algebraic.py
> > b/src/compiler/nir/nir_opt_algebraic.py
> > index 3c8318c..59f9e43 100644
> > --- a/src/compiler/nir/nir_opt_algebraic.py
> > +++ b/src/compiler/nir/nir_opt_algebraic.py
> > @@ -159,6 +159,10 @@ optimizations = [
> > # a != 0.0
> > (('flt', 0.0, ('fabs', a)), ('fne', a, 0.0)),
> >  
> > +   # ignore this opt when the result is used by a bcsel or if so
> > we
> > can make
> > +   # use of conditional modifiers on supported hardware.
> > +   (('flt(is_not_used_by_conditional)', ('fadd(is_used_once)', a,
> > ('fneg', b)), 0.0), ('flt', a, b)),
> > +
> > (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
> > (('bcsel', ('flt', b, a), b, a), ('fmin', a, b)),
> > (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
> > diff --git a/src/compiler/nir/nir_search_helpers.h
> > b/src/compiler/nir/nir_search_helpers.h
> > index ddaff52..05bd317 100644
> > --- a/src/compiler/nir/nir_search_helpers.h
> > +++ b/src/compiler/nir/nir_search_helpers.h
> > @@ -158,4 +158,19 @@ is_not_used_by_if(nir_alu_instr *instr)
> > return list_empty(>dest.dest.ssa.if_uses);
> >  }
> >  
> > +static inline bool
> > +is_not_used_by_conditional(nir_alu_instr *instr)
> > +{
> > +   if (!is_not_used_by_if(instr))
> > +  return false;
> > +
> > +   nir_foreach_use(use, >dest.dest.ssa) {
> > +  if (use->parent_instr->type == nir_instr_type_alu &&
> > +  nir_instr_as_alu(use->parent_instr)->op == nir_op_bcsel)
> > + return false;
> > +   }
> > +
> > +   return true;
> > +}
> > +
> >  #endif /* _NIR_SEARCH_ */
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] main/fboject: return if it is color_attachment on get_attachment

2017-01-12 Thread Alejandro Piñeiro
Some callers would need that info to know if they should raise
INVALID_ENUM or INVALID_OPERATION. An alternative would be the caller
to check if the attachment is a GL_COLOR_ATTACHMENTm, but that seems
redundant as get_attachment is already doing that.
---
 src/mesa/main/fbobject.c | 30 +++---
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 26fc15d..ce5eeae 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -222,15 +222,21 @@ get_framebuffer_target(struct gl_context *ctx, GLenum 
target)
  * default / window-system FB object.
  * If \p attachment is GL_DEPTH_STENCIL_ATTACHMENT, return a pointer to
  * the depth buffer attachment point.
+ * Returns if the attachment is a GL_COLOR_ATTACHMENTm_EXT on
+ * is_color_attachment, because several callers would return different errors
+ * if they don't find the attachment.
  */
 static struct gl_renderbuffer_attachment *
 get_attachment(struct gl_context *ctx, struct gl_framebuffer *fb,
-   GLenum attachment)
+   GLenum attachment, bool *is_color_attachment)
 {
GLuint i;
 
assert(_mesa_is_user_fbo(fb));
 
+   if (is_color_attachment)
+  *is_color_attachment = false;
+
switch (attachment) {
case GL_COLOR_ATTACHMENT0_EXT:
case GL_COLOR_ATTACHMENT1_EXT:
@@ -248,6 +254,8 @@ get_attachment(struct gl_context *ctx, struct 
gl_framebuffer *fb,
case GL_COLOR_ATTACHMENT13_EXT:
case GL_COLOR_ATTACHMENT14_EXT:
case GL_COLOR_ATTACHMENT15_EXT:
+  if (is_color_attachment)
+ *is_color_attachment = true;
   /* Only OpenGL ES 1.x forbids color attachments other than
* GL_COLOR_ATTACHMENT0.  For all other APIs the limit set by the
* hardware is used.
@@ -543,13 +551,13 @@ _mesa_FramebufferRenderbuffer_sw(struct gl_context *ctx,
 
mtx_lock(>Mutex);
 
-   att = get_attachment(ctx, fb, attachment);
+   att = get_attachment(ctx, fb, attachment, NULL);
assert(att);
if (rb) {
   set_renderbuffer_attachment(ctx, att, rb);
   if (attachment == GL_DEPTH_STENCIL_ATTACHMENT) {
  /* do stencil attachment here (depth already done above) */
- att = get_attachment(ctx, fb, GL_STENCIL_ATTACHMENT_EXT);
+ att = get_attachment(ctx, fb, GL_STENCIL_ATTACHMENT_EXT, NULL);
  assert(att);
  set_renderbuffer_attachment(ctx, att, rb);
   }
@@ -559,7 +567,7 @@ _mesa_FramebufferRenderbuffer_sw(struct gl_context *ctx,
   remove_attachment(ctx, att);
   if (attachment == GL_DEPTH_STENCIL_ATTACHMENT) {
  /* detach stencil (depth was detached above) */
- att = get_attachment(ctx, fb, GL_STENCIL_ATTACHMENT_EXT);
+ att = get_attachment(ctx, fb, GL_STENCIL_ATTACHMENT_EXT, NULL);
  assert(att);
  remove_attachment(ctx, att);
   }
@@ -1219,7 +1227,7 @@ _mesa_test_framebuffer_completeness(struct gl_context 
*ctx,
   for (j = 0; j < ctx->Const.MaxDrawBuffers; j++) {
  if (fb->ColorDrawBuffer[j] != GL_NONE) {
 const struct gl_renderbuffer_attachment *att
-   = get_attachment(ctx, fb, fb->ColorDrawBuffer[j]);
+   = get_attachment(ctx, fb, fb->ColorDrawBuffer[j], NULL);
 assert(att);
 if (att->Type == GL_NONE) {
fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER_EXT;
@@ -1232,7 +1240,7 @@ _mesa_test_framebuffer_completeness(struct gl_context 
*ctx,
   /* Check that the ReadBuffer is present */
   if (fb->ColorReadBuffer != GL_NONE) {
  const struct gl_renderbuffer_attachment *att
-= get_attachment(ctx, fb, fb->ColorReadBuffer);
+= get_attachment(ctx, fb, fb->ColorReadBuffer, NULL);
  assert(att);
  if (att->Type == GL_NONE) {
 fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER_EXT;
@@ -3151,7 +3159,7 @@ _mesa_framebuffer_texture(struct gl_context *ctx, struct 
gl_framebuffer *fb,
}
 
/* Not a hash lookup, so we can afford to get the attachment here. */
-   att = get_attachment(ctx, fb, attachment);
+   att = get_attachment(ctx, fb, attachment, NULL);
if (att == NULL) {
   _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", caller,
   _mesa_enum_to_string(attachment));
@@ -3495,7 +3503,7 @@ framebuffer_renderbuffer(struct gl_context *ctx,
   return;
}
 
-   att = get_attachment(ctx, fb, attachment);
+   att = get_attachment(ctx, fb, attachment, NULL);
if (att == NULL) {
   _mesa_error(ctx, GL_INVALID_ENUM,
   "%s(invalid attachment %s)", func,
@@ -3668,7 +3676,7 @@ _mesa_get_framebuffer_attachment_parameter(struct 
gl_context *ctx,
}
else {
   /* user-created framebuffer FBO */
-  att = get_attachment(ctx, buffer, attachment);
+  att = get_attachment(ctx, buffer, attachment, NULL);
}
 
if (att == NULL) {
@@ -3692,8 +3700,8 @@ 

[Mesa-dev] [PATCH 2/2] main/fbobject: throw invalid operation when get_attachment fails if needed

2017-01-12 Thread Alejandro Piñeiro
In most cases, if a call to get_attachment fails is because attachment
is a INVALID_ENUM. But for some specific cases, if COLOR_ATTACHMENTm
(where m >= MAX_COLOR_ATTACHMENTS) is used, it should raise an
INVALID_OPERATION exception instead.

Fixes:
GL45-CTS.direct_state_access.framebuffers_get_attachment_parameter_errors
GL45-CTS.direct_state_access.framebuffers_renderbuffer_attachment_errors
---

The code could be more simple if we do something like this:

   err = is_color_attachment ? GL_INVALID_OPERATION : GL_INVALID_ENUM;
   _mesa_error(...)

Although that would mean having the same error message for both
cases. Not a big deal. Im just slightly biased to differentiate a little.


 src/mesa/main/fbobject.c | 48 +---
 1 file changed, 41 insertions(+), 7 deletions(-)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index ce5eeae..871d08c 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -3495,6 +3495,7 @@ framebuffer_renderbuffer(struct gl_context *ctx,
  const char *func)
 {
struct gl_renderbuffer_attachment *att;
+   bool is_color_attachment;
 
if (_mesa_is_winsys_fbo(fb)) {
   /* Can't attach new renderbuffers to a window system framebuffer */
@@ -3503,11 +3504,28 @@ framebuffer_renderbuffer(struct gl_context *ctx,
   return;
}
 
-   att = get_attachment(ctx, fb, attachment, NULL);
+   att = get_attachment(ctx, fb, attachment, _color_attachment);
if (att == NULL) {
-  _mesa_error(ctx, GL_INVALID_ENUM,
-  "%s(invalid attachment %s)", func,
-  _mesa_enum_to_string(attachment));
+  /*
+   * From OpenGL 4.5 spec, section 9.2.7 "Attaching Renderbuffer Images to
+   * a Framebuffer":
+   *"An INVALID_OPERATION error is generated if attachment is COLOR_-
+   * ATTACHMENTm where m is greater than or equal to the value of
+   * MAX_COLOR_- ATTACHMENTS ."
+   *
+   * If we are at this point, is because the attachment is not valid, so
+   * if is_color_attachment is true, is because of the previous reason.
+   */
+  if (is_color_attachment) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(invalid attachment %s)", func,
+ _mesa_enum_to_string(attachment));
+  } else {
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "%s(invalid attachment %s)", func,
+ _mesa_enum_to_string(attachment));
+  }
+
   return;
}
 
@@ -3609,6 +3627,7 @@ _mesa_get_framebuffer_attachment_parameter(struct 
gl_context *ctx,
GLint *params, const char *caller)
 {
const struct gl_renderbuffer_attachment *att;
+   bool is_color_attachment;
GLenum err;
 
/* The error code for an attachment type of GL_NONE differs between APIs.
@@ -3676,12 +3695,27 @@ _mesa_get_framebuffer_attachment_parameter(struct 
gl_context *ctx,
}
else {
   /* user-created framebuffer FBO */
-  att = get_attachment(ctx, buffer, attachment, NULL);
+  att = get_attachment(ctx, buffer, attachment, _color_attachment);
}
 
if (att == NULL) {
-  _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", caller,
-  _mesa_enum_to_string(attachment));
+  /*
+   * From OpenGL 4.5 spec, section 9.2.3 "Framebuffer Object Queries":
+   *
+   *"An INVALID_OPERATION error is generated if a framebuffer object
+   * is bound to target and attachment is COLOR_ATTACHMENTm where m is
+   * greater than or equal to the value of MAX_COLOR_ATTACHMENTS."
+   *
+   * If we are at this point, is because the attachment is not valid, so
+   * if is_color_attachment is true, is because of the previous reason.
+   */
+  if (is_color_attachment) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid color attachment 
%s)",
+ caller, _mesa_enum_to_string(attachment));
+  } else {
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", caller,
+ _mesa_enum_to_string(attachment));
+  }
   return;
}
 
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/32] [v2] gbm: Export a getter for per plane handles

2017-01-12 Thread Jason Ekstrand
On Thu, Jan 12, 2017 at 10:57 AM, Daniel Stone  wrote:

> Hi,
>
> On 5 January 2017 at 04:43, Ben Widawsky  wrote:
> > diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_
> dri.c
> > index c61d56b44a..f9c1afd8cb 100644
> > --- a/src/gbm/backends/dri/gbm_dri.c
> > +++ b/src/gbm/backends/dri/gbm_dri.c
> > @@ -622,6 +622,33 @@ gbm_dri_bo_get_planes(struct gbm_bo *_bo)
> > return get_number_planes(dri, bo->image);
> >  }
> >
> > +static union gbm_bo_handle
> > +gbm_dri_bo_get_handle_for_plane(struct gbm_bo *_bo, int plane)
> > +{
> > +   struct gbm_dri_device *dri = gbm_dri_device(_bo->gbm);
> > +   struct gbm_dri_bo *bo = gbm_dri_bo(_bo);
> > +   union gbm_bo_handle ret;
> > +   ret.s32 = -1;
> > +
> > +   if (!dri->image || dri->image->base.version < 13 ||
> !dri->image->fromPlanar) {
> > +  errno = ENOSYS;
> > +  return ret;
> > +   }
> > +
> > +   if (plane >= get_number_planes(dri, bo->image))
> > +  return ret;
>
> if (!bo->image) {
>ret.s32 = bo->handle;
>return ret;
> }


I suggested EINVAL if plane is too large.  Would you rather do a default
thing?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Fix textureGather with RG32I/UI on Gen7.

2017-01-12 Thread Jordan Justen
On 2017-01-05 02:51:38, Kenneth Graunke wrote:
> According to the "Gather4 R32G32_FLOAT Bug" internal documentation
> page, the R32G32_UINT and R32G32_SINT formats are affected by the
> same bug as R32G32_FLOAT.  Applying the same workarounds should be
> viable - apparently the R32G32_FLOAT_LD format shouldn't corrupt
> integer data which is NaN or other sketchy floating point values.
> 
> One irritating caveat is that, because it's a FLOAT format, the
> alpha channel or any set to SCS_ONE return 0x3f8 (1.0) rather than
> integer 1.  So we need shader code to whack those channels to 1.
> 
> Fixes GL45-CTS.texture_gather.plain-gather-int-cube-rg on Haswell.
> 
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_wm.c   | 41 
> 
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  4 ++-
>  2 files changed, 37 insertions(+), 8 deletions(-)
> 
> Technically the brw_wm_surface_state.c hunk is enough to pass this test.
> 
> Asking for a dead channel (alpha) or constant value (one) with texture
> gather (which only gives you one channel's worth of data) is fairly
> pointless, so I don't expect anyone would actually do it...but, I tried
> to fix it with the brw_wm.c hunks.  Probably needs tests...
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
> b/src/mesa/drivers/dri/i965/brw_wm.c
> index b0cd1635521..024e948391e 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> @@ -353,13 +353,40 @@ brw_populate_sampler_prog_key_data(struct gl_context 
> *ctx,
> key->gl_clamp_mask[2] |= 1 << s;
>   }
>  
> - /* gather4's channel select for green from RG32F is broken; requires
> -  * a shader w/a on IVB; fixable with just SCS on HSW.
> -  */
> - if (brw->gen == 7 && !brw->is_haswell &&
> - prog->nir->info->uses_texture_gather) {
> -if (img->InternalFormat == GL_RG32F)
> -   key->gather_channel_quirk_mask |= 1 << s;
> + /* gather4 for RG32* is broken in multiple ways on Gen7. */
> + if (brw->gen == 7 && prog->nir->info->uses_texture_gather) {
> +switch (img->InternalFormat) {
> +case GL_RG32I:
> +case GL_RG32UI: {
> +   /* We have to override the format to R32G32_FLOAT_LD.
> +* This means that SCS_ALPHA and SCS_ONE will return 0x3f8
> +* (1.0) rather than integer 1.  This needs shader hacks.
> +*
> +* On Ivybridge, we whack W (alpha) to ONE in our key's
> +* swizzle.  On Haswell, we look at the original texture
> +* swizzle, and use XYZW with channels overridden to ONE,
> +* leaving normal texture swizzling to SCS.
> +*/
> +   unsigned src_swizzle =
> +  brw->is_haswell ? t->_Swizzle : key->swizzles[s];
> +   for (int i = 0; i < 4; i++) {
> +  unsigned src_comp = GET_SWZ(src_swizzle, i);
> +  if (src_comp == SWIZZLE_ONE || src_comp == SWIZZLE_W) {
> + key->swizzles[i] &= 0x7 << (3 * i);

Looks like the rhs should be inverted.

Reviewed-by: Jordan Justen 

> + key->swizzles[i] |= SWIZZLE_ONE << (3 * i);
> +  }
> +   }
> +   /* fallthrough */
> +}
> +case GL_RG32F:
> +   /* The channel select for green doesn't work - we have to
> +* request blue.  Haswell can use SCS for this, but Ivybridge
> +* needs a shader workaround.
> +*/
> +   if (!brw->is_haswell)
> +  key->gather_channel_quirk_mask |= 1 << s;
> +   break;
> +}
>   }
>  
>   /* Gen6's gather4 is broken for UINT/SINT; we treat them as
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index 6c44381be13..7a99aa434bf 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -565,7 +565,9 @@ brw_update_texture_surface(struct gl_context *ctx,
>/* Implement gen6 and gen7 gather work-around */
>bool need_green_to_blue = false;
>if (for_gather) {
> - if (brw->gen == 7 && format == BRW_SURFACEFORMAT_R32G32_FLOAT) {
> + if (brw->gen == 7 && (format == BRW_SURFACEFORMAT_R32G32_FLOAT ||
> +   format == BRW_SURFACEFORMAT_R32G32_SINT ||
> +   format == BRW_SURFACEFORMAT_R32G32_UINT)) {
>  format = BRW_SURFACEFORMAT_R32G32_FLOAT_LD;
>  need_green_to_blue = brw->is_haswell;
>   } else if (brw->gen == 6) {
> -- 
> 2.11.0
> 
> ___
> mesa-dev mailing list

Re: [Mesa-dev] [PATCH] nir/gcm: Rework the schedule late loop

2017-01-12 Thread Jason Ekstrand
On Thu, Jan 12, 2017 at 12:38 PM, Matt Turner  wrote:

> On Thu, Dec 1, 2016 at 1:51 PM, Jason Ekstrand 
> wrote:
> > This fixes a bug in code motion that occurred when the best block is the
> > same as the schedule early block.  In this case, because we're checking
> > (lca != def->parent_instr->block) at the top of the loop, we never get to
> > the check for loop depth so we wouldn't move it out of the loop.  This
> > commit reworks the loop to be a simple for loop up the dominator chain
> and
> > we place the (lca != def->parent_instr->block) check at the end of the
> > loop.
> > ---
> >  src/compiler/nir/nir_opt_gcm.c | 11 ++-
> >  1 file changed, 6 insertions(+), 5 deletions(-)
> >
> > diff --git a/src/compiler/nir/nir_opt_gcm.c b/src/compiler/nir/nir_opt_
> gcm.c
> > index 77eb8e6..9d7f59c 100644
> > --- a/src/compiler/nir/nir_opt_gcm.c
> > +++ b/src/compiler/nir/nir_opt_gcm.c
> > @@ -326,12 +326,13 @@ gcm_schedule_late_def(nir_ssa_def *def, void
> *void_state)
> >  * as far outside loops as we can get.
>
> In this comment, the first line says "know" instead of "now". Please
> fix that while we're here.
>

Thanks.  I'll fix that up.


> >  */
> > nir_block *best = lca;
> > -   while (lca != def->parent_instr->block) {
> > -  assert(lca);
> > -  if (state->blocks[lca->index].loop_depth <
> > +   for (nir_block *block = lca; block != NULL; block = block->imm_dom) {
> > +  if (state->blocks[block->index].loop_depth <
> >state->blocks[best->index].loop_depth)
> > - best = lca;
> > -  lca = lca->imm_dom;
> > + best = block;
> > +
> > +  if (block == def->parent_instr->block)
> > + break;
> > }
>
> I had to come up with an example and walk through it to understand the
> issue, but I now understand and agree that this is the correct fix.
>
> This patch is
>
> Reviewed-by: Matt Turner 
>

Thanks!  I'd almost forgotten about this patch.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] NaN behavior in GLSL (was Re: [PATCH] glsl: always do sqrt(abs()) and inversesqrt(abs()))

2017-01-12 Thread Matteo Bruni
2017-01-12 22:54 GMT+01:00 Axel Davy :
>
> Preventing NaN from being generated is not sufficient to fix the 0*inf = 0
> issue.
>
> For example radeonsi does convert all NaN to zeros via a hardware setting.
>
> But 0*inf = 0 behaviour should be also in mad, and with the NaN to zero
> conversion, you get 0 * inf + 24 = 0 instead of 24.

Do you refer to the d3d9 MAD or the hardware instruction? If the
former, just generating MUL and ADD separately should do the trick. In
the latter case, I guess that means the "NaN switch" should also
affect code generation (although I think that should be already
covered by the "precise" qualifier.)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] NaN behavior in GLSL (was Re: [PATCH] glsl: always do sqrt(abs()) and inversesqrt(abs()))

2017-01-12 Thread Matteo Bruni
2017-01-12 22:25 GMT+01:00 Roland Scheidegger :
> Is there actually a formal requirement that d3d9 hw never generates
> NaNs? I think d3d9 is very lacking in spec there - if that is specified
> somewhere I've never seen it... Maybe just everybody is expecting no
> NaNs there too (because earlier hw couldn't do it). I think some hw
> couldn't do infinity neither.

Right, I don't think there is any clear requirement in d3d9. Probably
applications at some point just started to depend (knowingly or not)
on not getting NaNs and later on the d3d9 drivers for newer hardware
which supported NaN had to preserve the established behavior to avoid
breaking them.

d3d9 and below don't have much of an actual spec though, similar
patterns of formally unspecified behavior actually requiring a
specific handling happen a lot so it doesn't seem particularly
surprising to me. On top of my mind, there's the behavior when
sampling from a texture unit with no texture bound but if you're
interested you can just look at the d3d9 tests in Wine to find a bunch
more...

> In any case, being able to select NaN behavior looks potentially quite
> useful to me.
>
> Roland
>
>
>> Cheers,
>> Matteo Bruni.
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/32] [v2] Renderbuffer Decompression (and GBM modifiers)

2017-01-12 Thread Ben Widawsky

On 17-01-12 14:44:33, Daniel Stone wrote:

Hi Ben,

On 3 January 2017 at 02:36, Ben Widawsky  wrote:

Same as v1 with the requested fixes and tags added. v1 is here:
https://lists.freedesktop.org/archives/intel-gfx/2016-December/113639.html

I haven't yet updated kmscube to use Kristian's GET_PLANE2, but I will be doing
it this week, I hope. Now all patches but the last should be safe to merge
without the kernel actually defining, or supporting the new fourcc codes.


A couple of things I noticed whilst looking through: the new GBM
entrypoints all seem to leak the DRIimage, as noted (except I forgot
get_modifier too).


I don't think get_modifier has a leak - it doesn't do fromPlanar because we made
the decision (and it's already merged) that the modifier must be the same for
all planes. Unless you're thinking of something else. I got all the other leaks
thanks for spotting those; I feel stupid but I somehow thought the DRIimage that
got created was magically cleaned up somehow.


Also, probably more importantly,  modifier 0 (linear) doesn't seem to be
handled; previously the LINEAR use flag accounted for this, so we should be
able to replace that by passing in a 0 modifier.



Yeah, before we started with GET_PLANE2 discussions, lack of modifier meant
linear, but now we have linear so I can handle that properly.


Daniel, I added your acked-by for all patches (even though I added a couple from
the original).


Fair enough; I have no real opinion on the later Intel-specific
patches, but if they work ...


I'll make sure to test it with your Weston branch at least to reciprocate.


Cheers,
Daniel


Thanks.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] NaN behavior in GLSL (was Re: [PATCH] glsl: always do sqrt(abs()) and inversesqrt(abs()))

2017-01-12 Thread Axel Davy

On 12/01/2017 22:03, Matteo Bruni wrote:

2017-01-11 19:09 GMT+01:00 Jason Ekstrand :

Another reason why I'm not a huge fan is that there is some momentum in the
industry to make GLSL better defined with respect to NaN.  I don't know that
anything will ever come of it (because it may break apps) but if something
does, we may find ourselves having to make SQRT and RSQ NaN-correct in the
future and, hey look, it'll break apps.

Hijacking the thread to focus on this. What's the interest about
specifically (assuming that's something that can be talked about
publicly?)

I'm asking because in Wine we have been bitten many times by the GLSL
behavior WRT NaN and at the moment we really can't do anything
sensible about it in the general case. Many d3d9 applications depend
on not generating NaN in cases where e.g. IEEE 754 would expect to.
The RSQ (i.e. inversesqrt()) from the original thread is one of those,
but that's not really a problem for sane applications since it's
"defined" in d3d8/d3d9 as taking the absolute value of the operand and
indeed we translate RSQ to inversesqrt(abs(x)). The real issue is with
cases like 0*inf, which AFAIK on d3d9 are supposed to give 0 as
result.

Of course, that changes with d3d10+, which generally requires to
generate and preserve NaN, as Roland already mentioned.

So, what would be really nice to have is a GLSL extension for some
kind of switch to select the requested behavior WRT NaN. For example a
three-way option with "don't generate NaN in arithmetic operations",
"do generate NaN" and "don't care". It could also be a GL state if
that's easier to implement with the existing hardware, since an
individual application isn't supposed to require different behavior
from one shader to the next.

Is anyone interested in / favorable to something like this? It would
solve the issue with defining NaN behavior in GLSL while making things
a bit more compatible with "other API a lot of games are ported from
which happens to be supported by all the desktop GPUs".


Preventing NaN from being generated is not sufficient to fix the 0*inf = 
0 issue.


For example radeonsi does convert all NaN to zeros via a hardware setting.

But 0*inf = 0 behaviour should be also in mad, and with the NaN to zero 
conversion, you get 0 * inf + 24 = 0 instead of 24.



Yours,


Axel Davy

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: add flt comparision simplification

2017-01-12 Thread Timothy Arceri
Actually I might just try moving this to the late block as the reason
for 'is_not_used_by_conditional' is to stop getting in the way of
ffmas.

On Fri, 2017-01-13 at 08:29 +1100, Timothy Arceri wrote:
> Didn't turn out as useful as I'd hoped, but it will help alot more on
> i965 by reducing regressions when we drop
> brw_do_channel_expressions()
> and brw_do_vector_splitting().
> 
> I'm not sure how much sense 'is_not_used_by_conditional' makes on
> platforms other than i965 but since this is a new opt it at least
> won't do any harm.
> 
> shader-db BDW:
> 
> total instructions in shared programs: 13060410 -> 13060242 (-0.00%)
> instructions in affected programs: 44880 -> 44712 (-0.37%)
> helped: 86
> HURT: 0
> 
> total cycles in shared programs: 256585692 -> 256584248 (-0.00%)
> cycles in affected programs: 8060766 -> 8059322 (-0.02%)
> helped: 177
> HURT: 33
> ---
>  src/compiler/nir/nir_opt_algebraic.py |  4 
>  src/compiler/nir/nir_search_helpers.h | 15 +++
>  2 files changed, 19 insertions(+)
> 
> diff --git a/src/compiler/nir/nir_opt_algebraic.py
> b/src/compiler/nir/nir_opt_algebraic.py
> index 3c8318c..59f9e43 100644
> --- a/src/compiler/nir/nir_opt_algebraic.py
> +++ b/src/compiler/nir/nir_opt_algebraic.py
> @@ -159,6 +159,10 @@ optimizations = [
> # a != 0.0
> (('flt', 0.0, ('fabs', a)), ('fne', a, 0.0)),
>  
> +   # ignore this opt when the result is used by a bcsel or if so we
> can make
> +   # use of conditional modifiers on supported hardware.
> +   (('flt(is_not_used_by_conditional)', ('fadd(is_used_once)', a,
> ('fneg', b)), 0.0), ('flt', a, b)),
> +
> (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
> (('bcsel', ('flt', b, a), b, a), ('fmin', a, b)),
> (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
> diff --git a/src/compiler/nir/nir_search_helpers.h
> b/src/compiler/nir/nir_search_helpers.h
> index ddaff52..05bd317 100644
> --- a/src/compiler/nir/nir_search_helpers.h
> +++ b/src/compiler/nir/nir_search_helpers.h
> @@ -158,4 +158,19 @@ is_not_used_by_if(nir_alu_instr *instr)
> return list_empty(>dest.dest.ssa.if_uses);
>  }
>  
> +static inline bool
> +is_not_used_by_conditional(nir_alu_instr *instr)
> +{
> +   if (!is_not_used_by_if(instr))
> +  return false;
> +
> +   nir_foreach_use(use, >dest.dest.ssa) {
> +  if (use->parent_instr->type == nir_instr_type_alu &&
> +  nir_instr_as_alu(use->parent_instr)->op == nir_op_bcsel)
> + return false;
> +   }
> +
> +   return true;
> +}
> +
>  #endif /* _NIR_SEARCH_ */
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] anv: generate entry points from vk.xml

2017-01-12 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/intel/vulkan/Makefile.am|  15 ++--
 src/intel/vulkan/anv_entrypoints_gen.py | 125 +---
 2 files changed, 71 insertions(+), 69 deletions(-)

diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am
index df7645fb13..d32b57f267 100644
--- a/src/intel/vulkan/Makefile.am
+++ b/src/intel/vulkan/Makefile.am
@@ -23,11 +23,6 @@ include Makefile.sources
 
 vulkan_includedir = $(includedir)/vulkan
 
-vulkan_include_HEADERS = \
-   $(top_srcdir)/include/vulkan/vk_platform.h \
-   $(top_srcdir)/include/vulkan/vulkan.h \
-   $(top_srcdir)/include/vulkan/vulkan_intel.h
-
 lib_LTLIBRARIES = libvulkan_intel.la
 
 check_LTLIBRARIES = libvulkan-test.la
@@ -138,12 +133,14 @@ VULKAN_LIB_DEPS += \
 nodist_EXTRA_libvulkan_intel_la_SOURCES = dummy.cpp
 libvulkan_intel_la_SOURCES = $(VULKAN_GEM_FILES)
 
-anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS)
-   $(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
+vulkan_api_xml = $(top_srcdir)/src/vulkan/registry/vk.xml
+
+anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_api_xml)
+   $(AM_V_GEN) cat $(vulkan_api_xml) |\
$(PYTHON2) $(srcdir)/anv_entrypoints_gen.py header > $@
 
-anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS)
-   $(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
+anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_api_xml)
+   $(AM_V_GEN) cat $(vulkan_api_xml) |\
$(PYTHON2) $(srcdir)/anv_entrypoints_gen.py code > $@
 
 BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
diff --git a/src/intel/vulkan/anv_entrypoints_gen.py 
b/src/intel/vulkan/anv_entrypoints_gen.py
index 9d23dbb358..e303b7e8e8 100644
--- a/src/intel/vulkan/anv_entrypoints_gen.py
+++ b/src/intel/vulkan/anv_entrypoints_gen.py
@@ -23,13 +23,7 @@
 #
 
 import fileinput, re, sys
-
-# Each function typedef in the vulkan.h header is all on one line and matches
-# this regepx. We hope that won't change.
-
-p = re.compile('typedef ([^ ]*) *\((?:VKAPI_PTR)? *\*PFN_vk([^(]*)\)(.*);')
-
-entrypoints = []
+import xml.etree.ElementTree as ET
 
 # We generate a static hash table for entry point lookup
 # (vkGetProcAddress). We use a linear congruential generator for our hash
@@ -51,29 +45,11 @@ def hash(name):
 
 return h
 
-def get_platform_guard_macro(name):
-if "Xlib" in name:
-return "VK_USE_PLATFORM_XLIB_KHR"
-elif "Xcb" in name:
-return "VK_USE_PLATFORM_XCB_KHR"
-elif "Wayland" in name:
-return "VK_USE_PLATFORM_WAYLAND_KHR"
-elif "Mir" in name:
-return "VK_USE_PLATFORM_MIR_KHR"
-elif "Android" in name:
-return "VK_USE_PLATFORM_ANDROID_KHR"
-elif "Win32" in name:
-return "VK_USE_PLATFORM_WIN32_KHR"
-else:
-return None
-
-def print_guard_start(name):
-guard = get_platform_guard_macro(name)
+def print_guard_start(guard):
 if guard is not None:
 print "#ifdef {0}".format(guard)
 
-def print_guard_end(name):
-guard = get_platform_guard_macro(name)
+def print_guard_end(guard):
 if guard is not None:
 print "#endif // {0}".format(guard)
 
@@ -87,18 +63,48 @@ elif (sys.argv[1] == "code"):
 opt_code = True
 sys.argv.pop()
 
-# Parse the entry points in the header
-
-i = 0
-for line in fileinput.input():
-m  = p.match(line)
-if (m):
-if m.group(2) == 'VoidFunction':
-continue
-fullname = "vk" + m.group(2)
-h = hash(fullname)
-entrypoints.append((m.group(1), m.group(2), m.group(3), i, h))
-i = i + 1
+# Extract the entry points from the registry
+def get_entrypoints(doc, entrypoints_to_defines):
+i = 0
+entrypoints = []
+commands = doc.findall('./commands/command')
+for command in commands:
+type = command.find('./proto/type').text
+name = command.find('./proto/name').text[2:]
+params = map(lambda p: "".join(p.itertext()), 
command.findall('./param'))
+params = ', '.join(params)
+if name in entrypoints_to_defines:
+guard = entrypoints_to_defines[name]
+else:
+guard = None
+entrypoints.append((type, name, params, i, hash(name), guard))
+i += 1
+return entrypoints
+
+# Maps entry points to extension defines
+def get_entrypoints_defines(doc):
+entrypoints_to_defines = {}
+extensions = doc.findall('./extensions/extension')
+for extension in extensions:
+define = extension.get('protect')
+entrypoints = extension.findall('./require/command')
+for entrypoint in entrypoints:
+name = entrypoint.get('name')[2:]
+entrypoints_to_defines[name] = define
+return entrypoints_to_defines
+
+doc = ET.parse(sys.stdin)
+entrypoints = get_entrypoints(doc, get_entrypoints_defines(doc))
+
+# Manually add CreateDmaBufImageINTEL for which we don't have an extension
+# defined.

[Mesa-dev] [PATCH 4/4] radv: generate entrypoints from vk.xml

2017-01-12 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/amd/vulkan/Makefile.am |  14 ++---
 src/amd/vulkan/radv_entrypoints_gen.py | 105 -
 2 files changed, 56 insertions(+), 63 deletions(-)

diff --git a/src/amd/vulkan/Makefile.am b/src/amd/vulkan/Makefile.am
index 6e184c0922..ac9ccaf45d 100644
--- a/src/amd/vulkan/Makefile.am
+++ b/src/amd/vulkan/Makefile.am
@@ -23,10 +23,6 @@ include Makefile.sources
 
 vulkan_includedir = $(includedir)/vulkan
 
-vulkan_include_HEADERS = \
-   $(top_srcdir)/include/vulkan/vk_platform.h \
-   $(top_srcdir)/include/vulkan/vulkan.h
-
 lib_LTLIBRARIES = libvulkan_radeon.la
 
 # The gallium includes are for the util/u_math.h include from main/macros.h
@@ -111,12 +107,14 @@ VULKAN_LIB_DEPS += \
 nodist_EXTRA_libvulkan_radeon_la_SOURCES = dummy.cpp
 libvulkan_radeon_la_SOURCES = $(VULKAN_GEM_FILES)
 
-radv_entrypoints.h : radv_entrypoints_gen.py $(vulkan_include_HEADERS)
-   $(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
+vulkan_api_xml = $(top_srcdir)/src/vulkan/registry/vk.xml
+
+radv_entrypoints.h : radv_entrypoints_gen.py $(vulkan_api_xml)
+   $(AM_V_GEN) cat $(vulkan_api_xml) |\
$(PYTHON2) $(srcdir)/radv_entrypoints_gen.py header > $@
 
-radv_entrypoints.c : radv_entrypoints_gen.py $(vulkan_include_HEADERS)
-   $(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
+radv_entrypoints.c : radv_entrypoints_gen.py $(vulkan_api_xml)
+   $(AM_V_GEN) cat $(vulkan_api_xml) |\
$(PYTHON2) $(srcdir)/radv_entrypoints_gen.py code > $@
 
 vk_format_table.c: vk_format_table.py \
diff --git a/src/amd/vulkan/radv_entrypoints_gen.py 
b/src/amd/vulkan/radv_entrypoints_gen.py
index a6e832a0ab..731ade218f 100644
--- a/src/amd/vulkan/radv_entrypoints_gen.py
+++ b/src/amd/vulkan/radv_entrypoints_gen.py
@@ -23,13 +23,7 @@
 #
 
 import fileinput, re, sys
-
-# Each function typedef in the vulkan.h header is all on one line and matches
-# this regepx. We hope that won't change.
-
-p = re.compile('typedef ([^ ]*) *\((?:VKAPI_PTR)? *\*PFN_vk([^(]*)\)(.*);')
-
-entrypoints = []
+import xml.etree.ElementTree as ET
 
 # We generate a static hash table for entry point lookup
 # (vkGetProcAddress). We use a linear congruential generator for our hash
@@ -51,29 +45,11 @@ def hash(name):
 
 return h
 
-def get_platform_guard_macro(name):
-if "Xlib" in name:
-return "VK_USE_PLATFORM_XLIB_KHR"
-elif "Xcb" in name:
-return "VK_USE_PLATFORM_XCB_KHR"
-elif "Wayland" in name:
-return "VK_USE_PLATFORM_WAYLAND_KHR"
-elif "Mir" in name:
-return "VK_USE_PLATFORM_MIR_KHR"
-elif "Android" in name:
-return "VK_USE_PLATFORM_ANDROID_KHR"
-elif "Win32" in name:
-return "VK_USE_PLATFORM_WIN32_KHR"
-else:
-return None
-
-def print_guard_start(name):
-guard = get_platform_guard_macro(name)
+def print_guard_start(guard):
 if guard is not None:
 print "#ifdef {0}".format(guard)
 
-def print_guard_end(name):
-guard = get_platform_guard_macro(name)
+def print_guard_end(guard):
 if guard is not None:
 print "#endif // {0}".format(guard)
 
@@ -87,18 +63,38 @@ elif (sys.argv[1] == "code"):
 opt_code = True
 sys.argv.pop()
 
-# Parse the entry points in the header
-
-i = 0
-for line in fileinput.input():
-m  = p.match(line)
-if (m):
-if m.group(2) == 'VoidFunction':
-continue
-fullname = "vk" + m.group(2)
-h = hash(fullname)
-entrypoints.append((m.group(1), m.group(2), m.group(3), i, h))
-i = i + 1
+# Extract the entry points from the registry
+def get_entrypoints(doc, entrypoints_to_defines):
+i = 0
+entrypoints = []
+commands = doc.findall('./commands/command')
+for command in commands:
+type = command.find('./proto/type').text
+name = command.find('./proto/name').text[2:]
+params = map(lambda p: "".join(p.itertext()), 
command.findall('./param'))
+params = ', '.join(params)
+if name in entrypoints_to_defines:
+guard = entrypoints_to_defines[name]
+else:
+guard = None
+entrypoints.append((type, name, params, i, hash(name), guard))
+i += 1
+return entrypoints
+
+# Maps entry points to extension defines
+def get_entrypoints_defines(doc):
+entrypoints_to_defines = {}
+extensions = doc.findall('./extensions/extension')
+for extension in extensions:
+define = extension.get('protect')
+entrypoints = extension.findall('./require/command')
+for entrypoint in entrypoints:
+name = entrypoint.get('name')[2:]
+entrypoints_to_defines[name] = define
+return entrypoints_to_defines
+
+doc = ET.parse(sys.stdin)
+entrypoints = get_entrypoints(doc, get_entrypoints_defines(doc))
 
 # For outputting entrypoints.h we generate a radv_EntryPoint() prototype
 # per entry point.
@@ -111,8 +107,7 @@ if opt_header:
   

[Mesa-dev] [PATCH 1/4] include: update Vulkan headers

2017-01-12 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 include/vulkan/vk_platform.h |   2 +-
 include/vulkan/vulkan.h  | 247 ++-
 2 files changed, 244 insertions(+), 5 deletions(-)

diff --git a/include/vulkan/vk_platform.h b/include/vulkan/vk_platform.h
index 0fa62ee23d..72f80493ca 100644
--- a/include/vulkan/vk_platform.h
+++ b/include/vulkan/vk_platform.h
@@ -2,7 +2,7 @@
 // File: vk_platform.h
 //
 /*
-** Copyright (c) 2014-2015 The Khronos Group Inc.
+** Copyright (c) 2014-2017 The Khronos Group Inc.
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h
index 694527c3bb..f24a0a2caf 100644
--- a/include/vulkan/vulkan.h
+++ b/include/vulkan/vulkan.h
@@ -43,7 +43,7 @@ extern "C" {
 #define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff)
 #define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff)
 // Version of this file
-#define VK_HEADER_VERSION 32
+#define VK_HEADER_VERSION 38
 
 
 #define VK_NULL_HANDLE 0
@@ -226,6 +226,12 @@ typedef enum VkStructureType {
 VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_NV = 157001,
 VK_STRUCTURE_TYPE_WIN32_KEYED_MUTEX_ACQUIRE_RELEASE_INFO_NV = 158000,
 VK_STRUCTURE_TYPE_VALIDATION_FLAGS_EXT = 161000,
+VK_STRUCTURE_TYPE_OBJECT_TABLE_CREATE_INFO_NVX = 186000,
+VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_CREATE_INFO_NVX = 186001,
+VK_STRUCTURE_TYPE_CMD_PROCESS_COMMANDS_INFO_NVX = 186002,
+VK_STRUCTURE_TYPE_CMD_RESERVE_SPACE_FOR_COMMANDS_INFO_NVX = 186003,
+VK_STRUCTURE_TYPE_DEVICE_GENERATED_COMMANDS_LIMITS_NVX = 186004,
+VK_STRUCTURE_TYPE_DEVICE_GENERATED_COMMANDS_FEATURES_NVX = 186005,
 VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO,
 VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO,
 VK_STRUCTURE_TYPE_RANGE_SIZE = 
(VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO - 
VK_STRUCTURE_TYPE_APPLICATION_INFO + 1),
@@ -918,6 +924,7 @@ typedef enum VkPipelineStageFlagBits {
 VK_PIPELINE_STAGE_HOST_BIT = 0x4000,
 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT = 0x8000,
 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT = 0x0001,
+VK_PIPELINE_STAGE_COMMAND_PROCESS_BIT_NVX = 0x0002,
 VK_PIPELINE_STAGE_FLAG_BITS_MAX_ENUM = 0x7FFF
 } VkPipelineStageFlagBits;
 typedef VkFlags VkPipelineStageFlags;
@@ -1092,6 +1099,8 @@ typedef enum VkAccessFlagBits {
 VK_ACCESS_HOST_WRITE_BIT = 0x4000,
 VK_ACCESS_MEMORY_READ_BIT = 0x8000,
 VK_ACCESS_MEMORY_WRITE_BIT = 0x0001,
+VK_ACCESS_COMMAND_PROCESS_READ_BIT_NVX = 0x0002,
+VK_ACCESS_COMMAND_PROCESS_WRITE_BIT_NVX = 0x0004,
 VK_ACCESS_FLAG_BITS_MAX_ENUM = 0x7FFF
 } VkAccessFlagBits;
 typedef VkFlags VkAccessFlags;
@@ -3735,7 +3744,7 @@ VKAPI_ATTR VkBool32 VKAPI_CALL 
vkGetPhysicalDeviceWin32PresentationSupportKHR(
 #define VK_EXT_debug_report 1
 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDebugReportCallbackEXT)
 
-#define VK_EXT_DEBUG_REPORT_SPEC_VERSION  3
+#define VK_EXT_DEBUG_REPORT_SPEC_VERSION  4
 #define VK_EXT_DEBUG_REPORT_EXTENSION_NAME "VK_EXT_debug_report"
 #define VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT 
VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT
 
@@ -3770,9 +3779,13 @@ typedef enum VkDebugReportObjectTypeEXT {
 VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT = 26,
 VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT = 27,
 VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT = 28,
+VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_KHR_EXT = 29,
+VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_MODE_KHR_EXT = 30,
+VK_DEBUG_REPORT_OBJECT_TYPE_OBJECT_TABLE_NVX_EXT = 31,
+VK_DEBUG_REPORT_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX_EXT = 32,
 VK_DEBUG_REPORT_OBJECT_TYPE_BEGIN_RANGE_EXT = 
VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT,
-VK_DEBUG_REPORT_OBJECT_TYPE_END_RANGE_EXT = 
VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT,
-VK_DEBUG_REPORT_OBJECT_TYPE_RANGE_SIZE_EXT = 
(VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT - 
VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT + 1),
+VK_DEBUG_REPORT_OBJECT_TYPE_END_RANGE_EXT = 
VK_DEBUG_REPORT_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX_EXT,
+VK_DEBUG_REPORT_OBJECT_TYPE_RANGE_SIZE_EXT = 
(VK_DEBUG_REPORT_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX_EXT - 
VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT + 1),
 VK_DEBUG_REPORT_OBJECT_TYPE_MAX_ENUM_EXT = 0x7FFF
 } VkDebugReportObjectTypeEXT;
 
@@ -4154,6 +4167,232 @@ typedef struct VkValidationFlagsEXT {
 
 
 
+#define VK_NVX_device_generated_commands 1
+VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkObjectTableNVX)
+VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkIndirectCommandsLayoutNVX)
+
+#define VK_NVX_DEVICE_GENERATED_COMMANDS_SPEC_VERSION 1
+#define VK_NVX_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME 
"VK_NVX_device_generated_commands"
+
+
+typedef enum 

[Mesa-dev] [PATCH 0/4] Vulkan: generate entry points from vk.xml

2017-01-12 Thread Lionel Landwerlin
Hi,

We're currently generating a few vulkan entry point files by parsing
the API files using regular expressions. This is limited for a couple
of reasons :
- function pointer types might not always be declared on a single
  line
- if function pointers are declared under conditional flags we
  have no way to tell

We can instead use the vk.xml file provided by Khronos [1]. This patch
implements this and also updates headers to latest available versions
(1.0.38).

Cheers,

Lionel

[1] : https://github.com/KhronosGroup/Vulkan-Docs/blob/1.0/src/spec/vk.xml
[2] : https://github.com/djdeath/mesa/tree/vk-xml

Lionel Landwerlin (4):
  include: update Vulkan headers
  vulkan: add API registry
  anv: generate entry points from vk.xml
  radv: generate entrypoints from vk.xml

 include/vulkan/vk_platform.h|2 +-
 include/vulkan/vulkan.h |  247 +-
 src/Makefile.am |4 +-
 src/amd/vulkan/Makefile.am  |   14 +-
 src/amd/vulkan/radv_entrypoints_gen.py  |  105 +-
 src/intel/vulkan/Makefile.am|   15 +-
 src/intel/vulkan/anv_entrypoints_gen.py |  125 +-
 src/vulkan/registry/vk.xml  | 4903 +++
 8 files changed, 5277 insertions(+), 138 deletions(-)
 create mode 100644 src/vulkan/registry/vk.xml

--
2.11.0
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nir: add flt comparision simplification

2017-01-12 Thread Timothy Arceri
Didn't turn out as useful as I'd hoped, but it will help alot more on
i965 by reducing regressions when we drop brw_do_channel_expressions()
and brw_do_vector_splitting().

I'm not sure how much sense 'is_not_used_by_conditional' makes on
platforms other than i965 but since this is a new opt it at least
won't do any harm.

shader-db BDW:

total instructions in shared programs: 13060410 -> 13060242 (-0.00%)
instructions in affected programs: 44880 -> 44712 (-0.37%)
helped: 86
HURT: 0

total cycles in shared programs: 256585692 -> 256584248 (-0.00%)
cycles in affected programs: 8060766 -> 8059322 (-0.02%)
helped: 177
HURT: 33
---
 src/compiler/nir/nir_opt_algebraic.py |  4 
 src/compiler/nir/nir_search_helpers.h | 15 +++
 2 files changed, 19 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index 3c8318c..59f9e43 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -159,6 +159,10 @@ optimizations = [
# a != 0.0
(('flt', 0.0, ('fabs', a)), ('fne', a, 0.0)),
 
+   # ignore this opt when the result is used by a bcsel or if so we can make
+   # use of conditional modifiers on supported hardware.
+   (('flt(is_not_used_by_conditional)', ('fadd(is_used_once)', a, ('fneg', 
b)), 0.0), ('flt', a, b)),
+
(('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
(('bcsel', ('flt', b, a), b, a), ('fmin', a, b)),
(('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
diff --git a/src/compiler/nir/nir_search_helpers.h 
b/src/compiler/nir/nir_search_helpers.h
index ddaff52..05bd317 100644
--- a/src/compiler/nir/nir_search_helpers.h
+++ b/src/compiler/nir/nir_search_helpers.h
@@ -158,4 +158,19 @@ is_not_used_by_if(nir_alu_instr *instr)
return list_empty(>dest.dest.ssa.if_uses);
 }
 
+static inline bool
+is_not_used_by_conditional(nir_alu_instr *instr)
+{
+   if (!is_not_used_by_if(instr))
+  return false;
+
+   nir_foreach_use(use, >dest.dest.ssa) {
+  if (use->parent_instr->type == nir_instr_type_alu &&
+  nir_instr_as_alu(use->parent_instr)->op == nir_op_bcsel)
+ return false;
+   }
+
+   return true;
+}
+
 #endif /* _NIR_SEARCH_ */
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] NaN behavior in GLSL (was Re: [PATCH] glsl: always do sqrt(abs()) and inversesqrt(abs()))

2017-01-12 Thread Roland Scheidegger
Am 12.01.2017 um 22:03 schrieb Matteo Bruni:
> 2017-01-11 19:09 GMT+01:00 Jason Ekstrand :
>> Another reason why I'm not a huge fan is that there is some momentum in the
>> industry to make GLSL better defined with respect to NaN.  I don't know that
>> anything will ever come of it (because it may break apps) but if something
>> does, we may find ourselves having to make SQRT and RSQ NaN-correct in the
>> future and, hey look, it'll break apps.
> 
> Hijacking the thread to focus on this. What's the interest about
> specifically (assuming that's something that can be talked about
> publicly?)
> 
> I'm asking because in Wine we have been bitten many times by the GLSL
> behavior WRT NaN and at the moment we really can't do anything
> sensible about it in the general case. Many d3d9 applications depend
> on not generating NaN in cases where e.g. IEEE 754 would expect to.
> The RSQ (i.e. inversesqrt()) from the original thread is one of those,
> but that's not really a problem for sane applications since it's
> "defined" in d3d8/d3d9 as taking the absolute value of the operand and
> indeed we translate RSQ to inversesqrt(abs(x)). The real issue is with
> cases like 0*inf, which AFAIK on d3d9 are supposed to give 0 as
> result.
> 
> Of course, that changes with d3d10+, which generally requires to
> generate and preserve NaN, as Roland already mentioned.
> 
> So, what would be really nice to have is a GLSL extension for some
> kind of switch to select the requested behavior WRT NaN. For example a
> three-way option with "don't generate NaN in arithmetic operations",
> "do generate NaN" and "don't care". It could also be a GL state if
> that's easier to implement with the existing hardware, since an
> individual application isn't supposed to require different behavior
> from one shader to the next.
> 
> Is anyone interested in / favorable to something like this? It would
> solve the issue with defining NaN behavior in GLSL while making things
> a bit more compatible with "other API a lot of games are ported from
> which happens to be supported by all the desktop GPUs".
> 
Is there actually a formal requirement that d3d9 hw never generates
NaNs? I think d3d9 is very lacking in spec there - if that is specified
somewhere I've never seen it... Maybe just everybody is expecting no
NaNs there too (because earlier hw couldn't do it). I think some hw
couldn't do infinity neither.

In any case, being able to select NaN behavior looks potentially quite
useful to me.

Roland


> Cheers,
> Matteo Bruni.
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97102] [dri][swr] stack overflow / infinite loop with GALLIUM_DRIVER=swr

2017-01-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97102

--- Comment #11 from Bruce Cherniak  ---
As Tim suggests, pruning empty nodes is probably the best solution for the
crash.

For performance, however, I'm not sure how many cores to expose in your case. 
cpuinfo shows that there are 4 threads across 2 cores, which we detect as 2
cores, with 2 hyperthreads.  Due to the way OpenSWR loads the processor, we
have found that not using the hyperthreads as OpenSWR workers yields the best
performance.  This may or may not be the case with your processor.

Something you can try is to set the environment variable
KNOB_MAX_THREADS_PER_CORE=0.  This will allow OpenSWR to use all 4 threads.

Please report back on how this affects performance.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/32] gbm: Introduce modifiers into surface/bo creation

2017-01-12 Thread Ben Widawsky

On 17-01-09 17:03:48, Jason Ekstrand wrote:

On Mon, Jan 2, 2017 at 6:37 PM, Ben Widawsky  wrote:


The idea behind modifiers like this is that the user of GBM will have
some mechanism to query what properties the hardware supports for its BO
or surface. This information is directly passed in (and stored) so that
the DRI implementation can create an image with the appropriate
attributes.

A getter() will be added later so that the user GBM will be able to
query what modifier should be used.

I've opted to store all modifiers passed in during creation and to make
the determination happen at actual creation time for no reason other
than it seems more flexible.

v2: Make sure to check if count is non-zero in addition to testing if
calloc fails. (Daniel)

v3: Remove "usage" and "flags" from modifier creation. Requested by
Kristian.

Cc: Kristian Høgsberg 
Cc: Daniel Stone 
Signed-off-by: Ben Widawsky 
Reviewed-by: Eric Engestrom 
Acked-by: Daniel Stone 
---
 src/egl/drivers/dri2/platform_drm.c | 19 +
 src/gbm/backends/dri/gbm_dri.c  | 41 ++
+--
 src/gbm/gbm-symbols-check   |  2 ++
 src/gbm/main/gbm.c  | 28 +++--
 src/gbm/main/gbm.h  | 12 +++
 src/gbm/main/gbmint.h   | 16 +--
 6 files changed, 104 insertions(+), 14 deletions(-)

diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/
platform_drm.c
index 20993147c8..86247ecaf3 100644
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -228,10 +228,21 @@ get_back_bo(struct dri2_egl_surface *dri2_surf)

if (dri2_surf->back == NULL)
   return -1;
-   if (dri2_surf->back->bo == NULL)
-  dri2_surf->back->bo = gbm_bo_create(_dpy->gbm_dri->base.base,
- surf->base.width,
surf->base.height,
- surf->base.format,
surf->base.flags);
+   if (dri2_surf->back->bo == NULL) {
+  if (surf->base.modifiers)
+ dri2_surf->back->bo = gbm_bo_create_with_modifiers(&
dri2_dpy->gbm_dri->base.base,
+
surf->base.width, surf->base.height,
+
surf->base.format,
+
surf->base.modifiers,
+
surf->base.count);
+  else
+ dri2_surf->back->bo = gbm_bo_create(_dpy->gbm_
dri->base.base,
+ surf->base.width,
+ surf->base.height,
+ surf->base.format,
+ surf->base.flags);
+
+   }
if (dri2_surf->back->bo == NULL)
   return -1;

diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_
dri.c
index 20bbf27cc3..f0e67b081e 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -958,13 +958,21 @@ free_bo:
 static struct gbm_bo *
 gbm_dri_bo_create(struct gbm_device *gbm,
   uint32_t width, uint32_t height,
-  uint32_t format, uint32_t usage)
+  uint32_t format, uint32_t usage,
+  const uint64_t *modifiers,
+  const unsigned int count)
 {
struct gbm_dri_device *dri = gbm_dri_device(gbm);
struct gbm_dri_bo *bo;
int dri_format;
unsigned dri_use = 0;

+   /* Callers of this may specify a modifier, or a dri usage, but not
both. The
+* newer modifier interface deprecates the older usage flags. This is
the
+* equivalent of usage NAND count.
+*/
+   assert(~(usage & count));



Again, I don't think this is what you want.  You want "assert(!(usage &&
count));"



Yeah.




+
if (usage & GBM_BO_USE_WRITE || dri->image == NULL)
   return create_dumb(gbm, width, height, format, usage);

@@ -1023,13 +1031,23 @@ gbm_dri_bo_create(struct gbm_device *gbm,
dri_use |= __DRI_IMAGE_USE_SHARE;

bo->image =
-  dri->image->createImage(dri->screen,
-  width, height,
-  dri_format, dri_use,
-  bo);
+  dri->image->createImageWithModifiers(dri->screen,
+   width, height,
+   dri_format,
+   modifiers, count,
+   bo);



Do we want to handle the case where your DRI is too old to have
createImageWithModifiers?



Yes.

Thanks.




if (bo->image == NULL)
   goto failed;

+   bo->base.base.modifiers = calloc(count, sizeof(*modifiers));


+   if (count && !bo->base.base.modifiers) {

+  dri->image->destroyImage(bo->image);
+  goto failed;
+   }
+
+   bo->base.base.count = count;
+   memcpy(bo->base.base.modifiers, modifiers, count *
sizeof(*modifiers));
+
dri->image->queryImage(bo->image, 

[Mesa-dev] NaN behavior in GLSL (was Re: [PATCH] glsl: always do sqrt(abs()) and inversesqrt(abs()))

2017-01-12 Thread Matteo Bruni
2017-01-11 19:09 GMT+01:00 Jason Ekstrand :
> Another reason why I'm not a huge fan is that there is some momentum in the
> industry to make GLSL better defined with respect to NaN.  I don't know that
> anything will ever come of it (because it may break apps) but if something
> does, we may find ourselves having to make SQRT and RSQ NaN-correct in the
> future and, hey look, it'll break apps.

Hijacking the thread to focus on this. What's the interest about
specifically (assuming that's something that can be talked about
publicly?)

I'm asking because in Wine we have been bitten many times by the GLSL
behavior WRT NaN and at the moment we really can't do anything
sensible about it in the general case. Many d3d9 applications depend
on not generating NaN in cases where e.g. IEEE 754 would expect to.
The RSQ (i.e. inversesqrt()) from the original thread is one of those,
but that's not really a problem for sane applications since it's
"defined" in d3d8/d3d9 as taking the absolute value of the operand and
indeed we translate RSQ to inversesqrt(abs(x)). The real issue is with
cases like 0*inf, which AFAIK on d3d9 are supposed to give 0 as
result.

Of course, that changes with d3d10+, which generally requires to
generate and preserve NaN, as Roland already mentioned.

So, what would be really nice to have is a GLSL extension for some
kind of switch to select the requested behavior WRT NaN. For example a
three-way option with "don't generate NaN in arithmetic operations",
"do generate NaN" and "don't care". It could also be a GL state if
that's easier to implement with the existing hardware, since an
individual application isn't supposed to require different behavior
from one shader to the next.

Is anyone interested in / favorable to something like this? It would
solve the issue with defining NaN behavior in GLSL while making things
a bit more compatible with "other API a lot of games are ported from
which happens to be supported by all the desktop GPUs".


Cheers,
Matteo Bruni.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] util: Implement a hash table cloning function

2017-01-12 Thread Thomas Helland
V2: Don't rzalloc; we are about to rewrite the whole thing (Vladislav)
---
 src/util/hash_table.c | 22 ++
 src/util/hash_table.h |  2 ++
 2 files changed, 24 insertions(+)

diff --git a/src/util/hash_table.c b/src/util/hash_table.c
index 9e643af8b2..603f694ccb 100644
--- a/src/util/hash_table.c
+++ b/src/util/hash_table.c
@@ -140,6 +140,28 @@ _mesa_hash_table_create(void *mem_ctx,
return ht;
 }
 
+struct hash_table *
+_mesa_hash_table_clone(struct hash_table *src, void *dst_mem_ctx)
+{
+   struct hash_table *ht;
+
+   ht = ralloc(dst_mem_ctx, struct hash_table);
+   if (ht == NULL)
+  return NULL;
+
+   memcpy(ht, src, sizeof(struct hash_table));
+
+   ht->table = ralloc_array(ht, struct hash_entry, ht->size);
+   if (ht->table == NULL) {
+  ralloc_free(ht);
+  return NULL;
+   }
+
+   memcpy(ht->table, src->table, ht->size * sizeof(struct hash_entry));
+
+   return ht;
+}
+
 /**
  * Frees the given hash table.
  *
diff --git a/src/util/hash_table.h b/src/util/hash_table.h
index b35ee871bb..971b04aff2 100644
--- a/src/util/hash_table.h
+++ b/src/util/hash_table.h
@@ -62,6 +62,8 @@ _mesa_hash_table_create(void *mem_ctx,
 uint32_t (*key_hash_function)(const void *key),
 bool (*key_equals_function)(const void *a,
 const void *b));
+struct hash_table *
+_mesa_hash_table_clone(struct hash_table *src, void *dst_mem_ctx);
 void _mesa_hash_table_destroy(struct hash_table *ht,
   void (*delete_function)(struct hash_entry 
*entry));
 void _mesa_hash_table_clear(struct hash_table *ht,
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: Use hash table cloning in copy propagation

2017-01-12 Thread Thomas Helland
Walking the whole hash table, inserting entries by hashing them first
is just a really really bad idea. We can simply memcpy the whole thing.
---
 src/compiler/glsl/opt_copy_propagation.cpp | 17 -
 .../glsl/opt_copy_propagation_elements.cpp | 29 --
 2 files changed, 15 insertions(+), 31 deletions(-)

diff --git a/src/compiler/glsl/opt_copy_propagation.cpp 
b/src/compiler/glsl/opt_copy_propagation.cpp
index 247c4988ed..4fd4aa8d82 100644
--- a/src/compiler/glsl/opt_copy_propagation.cpp
+++ b/src/compiler/glsl/opt_copy_propagation.cpp
@@ -202,16 +202,11 @@ ir_copy_propagation_visitor::handle_if_block(exec_list 
*instructions)
exec_list *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
 
-   acp = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
this->kills = new(mem_ctx) exec_list;
this->killed_all = false;
 
/* Populate the initial acp with a copy of the original */
-   struct hash_entry *entry;
-   hash_table_foreach(orig_acp, entry) {
-  _mesa_hash_table_insert(acp, entry->key, entry->data);
-   }
+   acp = _mesa_hash_table_clone(orig_acp, NULL);
 
visit_list_elements(this, instructions);
 
@@ -251,16 +246,14 @@ ir_copy_propagation_visitor::handle_loop(ir_loop *ir, 
bool keep_acp)
exec_list *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
 
-   acp = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
this->kills = new(mem_ctx) exec_list;
this->killed_all = false;
 
if (keep_acp) {
-  struct hash_entry *entry;
-  hash_table_foreach(orig_acp, entry) {
- _mesa_hash_table_insert(acp, entry->key, entry->data);
-  }
+  acp = _mesa_hash_table_clone(orig_acp, NULL);
+   } else {
+  acp = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+_mesa_key_pointer_equal);
}
 
visit_list_elements(this, >body_instructions);
diff --git a/src/compiler/glsl/opt_copy_propagation_elements.cpp 
b/src/compiler/glsl/opt_copy_propagation_elements.cpp
index 9f79fa9202..8bae424a1d 100644
--- a/src/compiler/glsl/opt_copy_propagation_elements.cpp
+++ b/src/compiler/glsl/opt_copy_propagation_elements.cpp
@@ -124,6 +124,12 @@ public:
   ralloc_free(mem_ctx);
}
 
+   void clone_acp(hash_table *lhs, hash_table *rhs)
+   {
+  lhs_ht = _mesa_hash_table_clone(lhs, mem_ctx);
+  rhs_ht = _mesa_hash_table_clone(rhs, mem_ctx);
+   }
+
void create_acp()
{
   lhs_ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
@@ -138,19 +144,6 @@ public:
   _mesa_hash_table_destroy(rhs_ht, NULL);
}
 
-   void populate_acp(hash_table *lhs, hash_table *rhs)
-   {
-  struct hash_entry *entry;
-
-  hash_table_foreach(lhs, entry) {
- _mesa_hash_table_insert(lhs_ht, entry->key, entry->data);
-  }
-
-  hash_table_foreach(rhs, entry) {
- _mesa_hash_table_insert(rhs_ht, entry->key, entry->data);
-  }
-   }
-
void handle_loop(ir_loop *, bool keep_acp);
virtual ir_visitor_status visit_enter(class ir_loop *);
virtual ir_visitor_status visit_enter(class ir_function_signature *);
@@ -395,10 +388,8 @@ 
ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
this->kills = new(mem_ctx) exec_list;
this->killed_all = false;
 
-   create_acp();
-
/* Populate the initial acp with a copy of the original */
-   populate_acp(orig_lhs_ht, orig_rhs_ht);
+   clone_acp(orig_lhs_ht, orig_rhs_ht);
 
visit_list_elements(this, instructions);
 
@@ -454,11 +445,11 @@ ir_copy_propagation_elements_visitor::handle_loop(ir_loop 
*ir, bool keep_acp)
this->kills = new(mem_ctx) exec_list;
this->killed_all = false;
 
-   create_acp();
-
if (keep_acp) {
   /* Populate the initial acp with a copy of the original */
-  populate_acp(orig_lhs_ht, orig_rhs_ht);
+  clone_acp(orig_lhs_ht, orig_rhs_ht);
+   } else {
+  create_acp();
}
 
visit_list_elements(this, >body_instructions);
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 6/9] radeonsi: remove unused si_prepare_cube_coords

2017-01-12 Thread Marek Olšák
I commented on patches 1 & 2. Other than that, patches 1-6 are:

Reviewed-by: Marek Olšák 

Marek

On Thu, Jan 12, 2017 at 4:39 PM, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>
> Reviewed-by: Bas Nieuwenhuizen 
> ---
>  src/gallium/drivers/radeonsi/si_shader_internal.h |   4 -
>  src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 196 
> --
>  2 files changed, 200 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
> b/src/gallium/drivers/radeonsi/si_shader_internal.h
> index 6b3ac17..51d491e 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_internal.h
> +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
> @@ -208,16 +208,12 @@ LLVMValueRef si_llvm_emit_fetch(struct 
> lp_build_tgsi_context *bld_base,
> const struct tgsi_full_src_register *reg,
> enum tgsi_opcode_type type,
> unsigned swizzle);
>
>  void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
> const struct tgsi_full_instruction *inst,
> const struct tgsi_opcode_info *info,
> LLVMValueRef dst[4]);
>
>  void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base);
> -void si_prepare_cube_coords(struct lp_build_tgsi_context *bld_base,
> -   struct lp_build_emit_data *emit_data,
> -   LLVMValueRef *coords_arg,
> -   LLVMValueRef *derivs_arg);
>
>  #endif
> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c 
> b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> index 11bd45d..085fd5b 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> @@ -899,206 +899,10 @@ void si_shader_context_init_alu(struct 
> lp_build_tgsi_context *bld_base)
> bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = emit_uadd;
> bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = emit_shl;
> bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = emit_ushr;
> bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = emit_ishr;
>
> bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = emit_umod;
> bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = emit_mod;
> bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = emit_udiv;
> bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = emit_idiv;
>  }
> -
> -/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
> - * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
> - * already multiplied by two. id is the cube face number.
> - */
> -struct cube_selection_coords {
> -   LLVMValueRef stc[2];
> -   LLVMValueRef ma;
> -   LLVMValueRef id;
> -};
> -
> -static void build_cube_intrinsic(struct gallivm_state *gallivm,
> -LLVMValueRef in[3],
> -struct cube_selection_coords *out)
> -{
> -   LLVMBuilderRef builder = gallivm->builder;
> -
> -   if (HAVE_LLVM >= 0x0309) {
> -   LLVMTypeRef f32 = LLVMTypeOf(in[0]);
> -
> -   out->stc[1] = lp_build_intrinsic(builder, 
> "llvm.amdgcn.cubetc",
> -   f32, in, 3, 
> LP_FUNC_ATTR_READNONE);
> -   out->stc[0] = lp_build_intrinsic(builder, 
> "llvm.amdgcn.cubesc",
> -   f32, in, 3, 
> LP_FUNC_ATTR_READNONE);
> -   out->ma = lp_build_intrinsic(builder, "llvm.amdgcn.cubema",
> -   f32, in, 3, 
> LP_FUNC_ATTR_READNONE);
> -   out->id = lp_build_intrinsic(builder, "llvm.amdgcn.cubeid",
> -   f32, in, 3, 
> LP_FUNC_ATTR_READNONE);
> -   } else {
> -   LLVMValueRef c[4] = {
> -   in[0],
> -   in[1],
> -   in[2],
> -   LLVMGetUndef(LLVMTypeOf(in[0]))
> -   };
> -   LLVMValueRef vec = lp_build_gather_values(gallivm, c, 4);
> -
> -   LLVMValueRef tmp =
> -   lp_build_intrinsic(builder, "llvm.AMDGPU.cube",
> - LLVMTypeOf(vec), , 1,
> - LP_FUNC_ATTR_READNONE);
> -
> -   out->stc[1] = LLVMBuildExtractElement(builder, tmp,
> -   lp_build_const_int32(gallivm, 0), "");
> -   out->stc[0] = LLVMBuildExtractElement(builder, tmp,
> -   lp_build_const_int32(gallivm, 1), "");
> -   out->ma = LLVMBuildExtractElement(builder, tmp,
> -   lp_build_const_int32(gallivm, 2), "");
> -   out->id = LLVMBuildExtractElement(builder, tmp,

Re: [Mesa-dev] [PATCH v2 2/9] radeonsi: restrict cube map derivative computations to the correct plane

2017-01-12 Thread Marek Olšák
On Thu, Jan 12, 2017 at 4:39 PM, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>
> As remarked by the comment in the original code, the old algorithm fails when
> (tc + deriv) points at a different cube face. Instead, simply project the
> derivative directly to the plane of the selected cube face.
>
> The new code is based on exactly differentiating (using the chain rule)
> the projection onto a plane corresponding to a fixed cube map face (which
> is still selected in the usual way based on the texture coordinate itself).
> The computations end up fairly involved, but we do save two reciprocal
> computations.
>
> Fixes GL45-CTS.texture_cube_map_array.sampling.
>
> v2: add 0.5 offset to tex coords only after derivative calculation
> v3: go back to 1.5 offset
>
> Reviewed-by: Bas Nieuwenhuizen  (v2)
> ---
>  src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 130 
> ++
>  1 file changed, 107 insertions(+), 23 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c 
> b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> index 9b8c9b0..60346cb 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> @@ -952,20 +952,76 @@ static void build_cube_intrinsic(struct gallivm_state 
> *gallivm,
> lp_build_const_int32(gallivm, 0), "");
> out->stc[0] = LLVMBuildExtractElement(builder, tmp,
> lp_build_const_int32(gallivm, 1), "");
> out->ma = LLVMBuildExtractElement(builder, tmp,
> lp_build_const_int32(gallivm, 2), "");
> out->id = LLVMBuildExtractElement(builder, tmp,
> lp_build_const_int32(gallivm, 3), "");
> }
>  }
>
> +/**
> + * Build a manual selection sequence for cube face sc/tc coordinates and
> + * major axis vector (multiplied by 2 for consistency) for the given
> + * vec3 \p coords, for the face implied by \p selcoords.
> + *
> + * For the major axis, we always adjust the sign to be in the direction of
> + * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
> + * the selcoords major axis.
> + */
> +static void build_cube_select(LLVMBuilderRef builder,
> + const struct cube_selection_coords *selcoords,
> + const LLVMValueRef *coords,
> + LLVMValueRef *out_st,
> + LLVMValueRef *out_ma)
> +{
> +   LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
> +   LLVMValueRef is_ma_positive;
> +   LLVMValueRef sgn_ma;
> +   LLVMValueRef is_ma_z, is_not_ma_z;
> +   LLVMValueRef is_ma_y;
> +   LLVMValueRef is_ma_x;
> +   LLVMValueRef sgn;
> +   LLVMValueRef tmp;
> +
> +   is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE,
> +   selcoords->ma, LLVMConstReal(f32, 0.0), "");
> +   sgn_ma = LLVMBuildSelect(builder, is_ma_positive,
> +   LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), "");
> +
> +   is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, 
> LLVMConstReal(f32, 4.0), "");
> +   is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
> +   is_ma_y = LLVMBuildAnd(builder, is_not_ma_z,
> +   LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, 
> LLVMConstReal(f32, 2.0), ""), "");
> +   is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, 
> is_ma_y, ""), "");
> +
> +   /* Select sc */
> +   tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], "");
> +   sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
> +   LLVMBuildSelect(builder, is_ma_x, sgn_ma,
> +   LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
> +   out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
> +
> +   /* Select tc */
> +   tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
> +   sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, 
> sgn_ma, ""),
> +   LLVMConstReal(f32, -1.0), "");
> +   out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
> +
> +   /* Select ma */
> +   tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
> +   LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), 
> "");
> +   sgn = LLVMBuildSelect(builder, is_ma_positive,
> +   LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), "");
> +   *out_ma = LLVMBuildFMul(builder, tmp, sgn, "");
> +}
> +
>  static void si_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
>   LLVMValueRef *in, LLVMValueRef *out)
>  {
> struct gallivm_state *gallivm = bld_base->base.gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> LLVMTypeRef type = 

Re: [Mesa-dev] [PATCH] nir/gcm: Rework the schedule late loop

2017-01-12 Thread Matt Turner
On Thu, Dec 1, 2016 at 1:51 PM, Jason Ekstrand  wrote:
> This fixes a bug in code motion that occurred when the best block is the
> same as the schedule early block.  In this case, because we're checking
> (lca != def->parent_instr->block) at the top of the loop, we never get to
> the check for loop depth so we wouldn't move it out of the loop.  This
> commit reworks the loop to be a simple for loop up the dominator chain and
> we place the (lca != def->parent_instr->block) check at the end of the
> loop.
> ---
>  src/compiler/nir/nir_opt_gcm.c | 11 ++-
>  1 file changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/src/compiler/nir/nir_opt_gcm.c b/src/compiler/nir/nir_opt_gcm.c
> index 77eb8e6..9d7f59c 100644
> --- a/src/compiler/nir/nir_opt_gcm.c
> +++ b/src/compiler/nir/nir_opt_gcm.c
> @@ -326,12 +326,13 @@ gcm_schedule_late_def(nir_ssa_def *def, void 
> *void_state)
>  * as far outside loops as we can get.

In this comment, the first line says "know" instead of "now". Please
fix that while we're here.

>  */
> nir_block *best = lca;
> -   while (lca != def->parent_instr->block) {
> -  assert(lca);
> -  if (state->blocks[lca->index].loop_depth <
> +   for (nir_block *block = lca; block != NULL; block = block->imm_dom) {
> +  if (state->blocks[block->index].loop_depth <
>state->blocks[best->index].loop_depth)
> - best = lca;
> -  lca = lca->imm_dom;
> + best = block;
> +
> +  if (block == def->parent_instr->block)
> + break;
> }

I had to come up with an example and walk through it to understand the
issue, but I now understand and agree that this is the correct fix.

This patch is

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radeonsi: replace si_shader_context::soa by bld_base

2017-01-12 Thread Marek Olšák
For the series:

Reviewed-by: Marek Olšák 

Marek

On Thu, Jan 12, 2017 at 2:07 PM, Samuel Pitoiset
 wrote:
> We no longer need to use lp_build_tgsi_soa_context.
>
> No regressions founds with full piglit run.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/radeonsi/si_shader.c   | 112 
> ++---
>  src/gallium/drivers/radeonsi/si_shader_internal.h  |   2 +-
>  .../drivers/radeonsi/si_shader_tgsi_setup.c|  46 -
>  3 files changed, 78 insertions(+), 82 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> b/src/gallium/drivers/radeonsi/si_shader.c
> index 962c2f206c..c1672ef472 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -176,7 +176,7 @@ static LLVMValueRef unpack_param(struct si_shader_context 
> *ctx,
>   param);
>
> if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMFloatTypeKind)
> -   value = bitcast(>soa.bld_base,
> +   value = bitcast(>bld_base,
> TGSI_TYPE_UNSIGNED, value);
>
> if (rshift)
> @@ -251,7 +251,7 @@ get_tcs_out_patch_stride(struct si_shader_context *ctx)
>  static LLVMValueRef
>  get_tcs_out_patch0_offset(struct si_shader_context *ctx)
>  {
> -   return lp_build_mul_imm(>soa.bld_base.uint_bld,
> +   return lp_build_mul_imm(>bld_base.uint_bld,
> unpack_param(ctx,
>  SI_PARAM_TCS_OUT_OFFSETS,
>  0, 16),
> @@ -261,7 +261,7 @@ get_tcs_out_patch0_offset(struct si_shader_context *ctx)
>  static LLVMValueRef
>  get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx)
>  {
> -   return lp_build_mul_imm(>soa.bld_base.uint_bld,
> +   return lp_build_mul_imm(>bld_base.uint_bld,
> unpack_param(ctx,
>  SI_PARAM_TCS_OUT_OFFSETS,
>  16, 16),
> @@ -322,7 +322,7 @@ static void build_indexed_store(struct si_shader_context 
> *ctx,
> LLVMValueRef base_ptr, LLVMValueRef index,
> LLVMValueRef value)
>  {
> -   struct lp_build_tgsi_context *bld_base = >soa.bld_base;
> +   struct lp_build_tgsi_context *bld_base = >bld_base;
> struct gallivm_state *gallivm = bld_base->base.gallivm;
>
> LLVMBuildStore(gallivm->builder, value,
> @@ -342,7 +342,7 @@ static LLVMValueRef build_indexed_load(struct 
> si_shader_context *ctx,
>LLVMValueRef base_ptr, LLVMValueRef 
> index,
>bool uniform)
>  {
> -   struct lp_build_tgsi_context *bld_base = >soa.bld_base;
> +   struct lp_build_tgsi_context *bld_base = >bld_base;
> struct gallivm_state *gallivm = bld_base->base.gallivm;
> LLVMValueRef pointer;
>
> @@ -370,8 +370,8 @@ static LLVMValueRef get_instance_index_for_fetch(
> unsigned param_start_instance, unsigned divisor)
>  {
> struct si_shader_context *ctx =
> -   si_shader_context(_bld->soa.bld_base);
> -   struct gallivm_state *gallivm = radeon_bld->soa.bld_base.base.gallivm;
> +   si_shader_context(_bld->bld_base);
> +   struct gallivm_state *gallivm = radeon_bld->bld_base.base.gallivm;
>
> LLVMValueRef result = LLVMGetParam(radeon_bld->main_fn,
>ctx->param_instance_id);
> @@ -391,7 +391,7 @@ static void declare_input_vs(
> const struct tgsi_full_declaration *decl,
> LLVMValueRef out[4])
>  {
> -   struct lp_build_context *base = >soa.bld_base.base;
> +   struct lp_build_context *base = >bld_base.base;
> struct gallivm_state *gallivm = base->gallivm;
>
> unsigned chan;
> @@ -508,7 +508,7 @@ static LLVMValueRef get_indirect_index(struct 
> si_shader_context *ctx,
>const struct tgsi_ind_register *ind,
>int rel_index)
>  {
> -   struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm;
> +   struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
> LLVMValueRef result;
>
> result = ctx->addrs[ind->Index][ind->Swizzle];
> @@ -548,7 +548,7 @@ static LLVMValueRef get_dw_address(struct 
> si_shader_context *ctx,
>LLVMValueRef vertex_dw_stride,
>LLVMValueRef base_addr)
>  {
> -   struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm;
> +   struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
> struct tgsi_shader_info *info = >shader->selector->info;
> ubyte *name, *index, *array_first;
> int 

Re: [Mesa-dev] [PATCH v2 1/9] radeonsi: communicate cube map coordinates more explicitly

2017-01-12 Thread Marek Olšák
On Thu, Jan 12, 2017 at 4:39 PM, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>
> Reviewed-by: Bas Nieuwenhuizen 
> ---
>  src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 74 
> +--
>  1 file changed, 42 insertions(+), 32 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c 
> b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> index 0a49bc2..9b8c9b0 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> @@ -900,88 +900,98 @@ void si_shader_context_init_alu(struct 
> lp_build_tgsi_context *bld_base)
> bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = emit_shl;
> bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = emit_ushr;
> bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = emit_ishr;
>
> bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = emit_umod;
> bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = emit_mod;
> bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = emit_udiv;
> bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = emit_idiv;
>  }
>
> -static LLVMValueRef build_cube_intrinsic(struct gallivm_state *gallivm,
> -LLVMValueRef in[3])
> +/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
> + * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
> + * already multiplied by two. id is the cube face number.
> + */
> +struct cube_selection_coords {
> +   LLVMValueRef stc[2];
> +   LLVMValueRef ma;
> +   LLVMValueRef id;
> +};
> +
> +static void build_cube_intrinsic(struct gallivm_state *gallivm,
> +LLVMValueRef in[3],
> +struct cube_selection_coords *out)
>  {
> +   LLVMBuilderRef builder = gallivm->builder;
> +
> if (HAVE_LLVM >= 0x0309) {
> LLVMTypeRef f32 = LLVMTypeOf(in[0]);
> -   LLVMValueRef out[4];
>
> -   out[0] = lp_build_intrinsic(gallivm->builder, 
> "llvm.amdgcn.cubetc",
> +   out->stc[1] = lp_build_intrinsic(builder, 
> "llvm.amdgcn.cubetc",
> f32, in, 3, 
> LP_FUNC_ATTR_READNONE);
> -   out[1] = lp_build_intrinsic(gallivm->builder, 
> "llvm.amdgcn.cubesc",
> +   out->stc[0] = lp_build_intrinsic(builder, 
> "llvm.amdgcn.cubesc",
> f32, in, 3, 
> LP_FUNC_ATTR_READNONE);
> -   out[2] = lp_build_intrinsic(gallivm->builder, 
> "llvm.amdgcn.cubema",
> +   out->ma = lp_build_intrinsic(builder, "llvm.amdgcn.cubema",
> f32, in, 3, 
> LP_FUNC_ATTR_READNONE);
> -   out[3] = lp_build_intrinsic(gallivm->builder, 
> "llvm.amdgcn.cubeid",
> +   out->id = lp_build_intrinsic(builder, "llvm.amdgcn.cubeid",
> f32, in, 3, 
> LP_FUNC_ATTR_READNONE);
> -
> -   return lp_build_gather_values(gallivm, out, 4);
> } else {
> LLVMValueRef c[4] = {
> in[0],
> in[1],
> in[2],
> LLVMGetUndef(LLVMTypeOf(in[0]))
> };
> LLVMValueRef vec = lp_build_gather_values(gallivm, c, 4);
>
> -   return lp_build_intrinsic(gallivm->builder, 
> "llvm.AMDGPU.cube",
> +   LLVMValueRef tmp =
> +   lp_build_intrinsic(builder, "llvm.AMDGPU.cube",
>   LLVMTypeOf(vec), , 1,
>   LP_FUNC_ATTR_READNONE);
> +
> +   out->stc[1] = LLVMBuildExtractElement(builder, tmp,
> +   lp_build_const_int32(gallivm, 0), "");
> +   out->stc[0] = LLVMBuildExtractElement(builder, tmp,
> +   lp_build_const_int32(gallivm, 1), "");
> +   out->ma = LLVMBuildExtractElement(builder, tmp,
> +   lp_build_const_int32(gallivm, 2), "");
> +   out->id = LLVMBuildExtractElement(builder, tmp,
> +   lp_build_const_int32(gallivm, 3), "");
> }
>  }
>
>  static void si_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
>   LLVMValueRef *in, LLVMValueRef *out)
>  {
> struct gallivm_state *gallivm = bld_base->base.gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> LLVMTypeRef type = bld_base->base.elem_type;
> -   LLVMValueRef coords[4];
> +   struct cube_selection_coords coords;
> +   LLVMValueRef invma;
> LLVMValueRef mad_args[3];
> -   LLVMValueRef v;
> -   unsigned i;
> -
> -   v = build_cube_intrinsic(gallivm, in);
>
> -   for (i = 0; 

Re: [Mesa-dev] [PATCH 3/4] util: Implement a hash table cloning function

2017-01-12 Thread Vladislav Egorov


12.01.2017 22:23, Thomas Helland пишет:

---
  src/util/hash_table.c | 22 ++
  src/util/hash_table.h |  2 ++
  2 files changed, 24 insertions(+)

diff --git a/src/util/hash_table.c b/src/util/hash_table.c
index 9e643af8b2..702f465382 100644
--- a/src/util/hash_table.c
+++ b/src/util/hash_table.c
@@ -140,6 +140,28 @@ _mesa_hash_table_create(void *mem_ctx,
 return ht;
  }
  
+struct hash_table *

+_mesa_hash_table_clone(struct hash_table *src, void *dst_mem_ctx)
+{
+   struct hash_table *ht;
+
+   ht = ralloc(dst_mem_ctx, struct hash_table);
+   if (ht == NULL)
+  return NULL;
+
+   memcpy(ht, src, sizeof(struct hash_table));
+
+   ht->table = rzalloc_array(ht, struct hash_entry, ht->size);

rzalloc is unnecessary here, ht->table is about to be overwritten.

+   if (ht->table == NULL) {
+  ralloc_free(ht);
+  return NULL;
+   }
+
+   memcpy(ht->table, src->table, ht->size * sizeof(struct hash_entry));
+
+   return ht;
+}
+
  /**
   * Frees the given hash table.
   *
diff --git a/src/util/hash_table.h b/src/util/hash_table.h
index b35ee871bb..971b04aff2 100644
--- a/src/util/hash_table.h
+++ b/src/util/hash_table.h
@@ -62,6 +62,8 @@ _mesa_hash_table_create(void *mem_ctx,
  uint32_t (*key_hash_function)(const void *key),
  bool (*key_equals_function)(const void *a,
  const void *b));
+struct hash_table *
+_mesa_hash_table_clone(struct hash_table *src, void *dst_mem_ctx);
  void _mesa_hash_table_destroy(struct hash_table *ht,
void (*delete_function)(struct hash_entry 
*entry));
  void _mesa_hash_table_clear(struct hash_table *ht,


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/32] gbm: Export a per plane getter for stride

2017-01-12 Thread Ben Widawsky

On 17-01-12 14:33:01, Daniel Stone wrote:

On 12 January 2017 at 14:32, Daniel Stone  wrote:

If allocated, this image is just leaked, along with its reference on
the BO. Same problem in gbm_dri_bo_get_handle_for_plane.


... and gbm_dri_bo_get_offset.

Cheers,
Daniel


Does this work (compile tested only)?

diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index 656fef3481..3c44c24d81 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -684,6 +684,9 @@ gbm_dri_bo_get_stride(struct gbm_bo *_bo, int plane)

   dri->image->queryImage(image, __DRI_IMAGE_ATTRIB_STRIDE, );

+   if (image != bo->image)
+  dri->image->destroyImage(image);
+
   return (uint32_t)stride;
}

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 99388] RHEL 7.3, Matlab 2016a Segmentation fault - glEnable

2017-01-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=99388

Bug ID: 99388
   Summary: RHEL 7.3, Matlab 2016a Segmentation fault - glEnable
   Product: Mesa
   Version: 11.2
  Hardware: x86-64 (AMD64)
OS: Linux (All)
Status: NEW
  Severity: major
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: advor...@cs.princeton.edu
QA Contact: mesa-dev@lists.freedesktop.org

Created attachment 128918
  --> https://bugs.freedesktop.org/attachment.cgi?id=128918=edit
Failing code

Hello.

Environment:
RHEL 7.3
Matlab 2016a
Osmesa - 11.2.2

It works without any issues on an older version of RHEL (6) and with Matlab
2014a.

I'm attaching the code.

I'm getting segmentation fault right before:

  glEnable(GL_DEPTH_TEST);
  glDisable(GL_LIGHTING);
  glEnable(GL_CULL_FACE);
  glCullFace(GL_BACK);

I have also compiled with mex -g and logs are below.

Thank you for your help,
Asya


>> [label,depth]=WarpMeshColor(P,outImsize(2),outImsize(1),XYZcamera);

MEX FILE: WarpMeshColor.mexa64 entry point located at address 0xb9396089
Add breakpoints at the debugger prompt and issue a "continue" to resume
execution of MATLAB.


Program received signal SIGUSR1, User defined signal 1.
0x7599f6d5 in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib64/libpthread.so.0
(gdb) continue
Continuing.
output size:
m_width=400
m_height=640
Step 1: setup off-screen binding
Step 2

Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fffd392e700 (LWP 11194)]
0x7fffb8bdf170 inc () from /lib64/libOSMesa.so.8
(gdb) continue
Continuing.
Attempt to restart MATLAB? [y or n]>>n

   Segmentation violation detected at Wed Jan  4 12:57:49 2017


Configuration:
  Crash Decoding  : Disabled
  Crash Mode  : continue (default)
  Current Graphics Driver: Unknown software
  Current Visual  : None
  Default Encoding: UTF-8
  GNU C Library   : 2.17 stable
  Host Name   : HOST
  MATLAB Architecture : glnxa64
  MATLAB Root : /usr/local/Matlab/R2016a
  MATLAB Version  : 9.0.0.341360 (R2016a)
  OpenGL  : software
  Operating System: Linux 3.10.0-514.2.2.el7.x86_64 #1 SMP Wed Dec 7
11:19:34 EST 2016 x86_64
  Processor ID: x86 Family 6 Model 37 Stepping 1, GenuineIntel
  Window System   : No active display

Fault Count: 1


Abnormal termination:
Segmentation violation

Register State (from fault):
  RAX = ff08  RBX = 0b71
  RCX = 75bab2e0  RDX = 0001
  RSP = 7fffd392abc0  RBP = 
  RSI = 0b71  RDI = 

   R8 = 7598fa00   R9 = 7fffcd576f70
  R10 = 7fffd392a970  R11 = 7fffb8bdf6f0
  R12 = 0001  R13 = 7fffccfbea80
  R14 = 7fffd392b4f8  R15 = 7fffd392b4f8

  RIP = 7fffb8bdf170  EFL = 00010246

   CS = 0033   FS =    GS = 

Stack Trace (from fault):
[  0] 0x7fffb8bdf170 
/lib64/libOSMesa.so.8+00553328
[  1] 0x7fffb939623d  WarpMeshColor.mexa64+4669
mexFunction+0436
[  2] 0x7fffe0f868b0
/usr/local/Matlab/R2016a/bin/glnxa64/libmex.so+00149680 mexRunMexFile+0064
[  3] 0x7fffe0f83214
/usr/local/Matlab/R2016a/bin/glnxa64/libmex.so+00135700
[  4] 0x7fffe0f83db4
/usr/local/Matlab/R2016a/bin/glnxa64/libmex.so+00138676
[  5] 0x7fffe1d2d505
/usr/local/Matlab/R2016a/bin/glnxa64/libmwm_dispatcher.so+00746757
_ZN8Mfh_file16dispatch_fh_implEMS_FviPP11mxArray_tagiS2_EiS2_iS2_+1509
[  6] 0x7fffe1d2d9a0
/usr/local/Matlab/R2016a/bin/glnxa64/libmwm_dispatcher.so+00747936
_ZN8Mfh_file11dispatch_fhEiPP11mxArray_tagiS2_+0032
[  7] 0x7fffded191c9
/usr/local/Matlab/R2016a/bin/glnxa64/libmwm_lxe.so+09232841
[  8] 0x7fffdee45dbf
/usr/local/Matlab/R2016a/bin/glnxa64/libmwm_lxe.so+10464703
[  9] 0x7fffdee3b8e6
/usr/local/Matlab/R2016a/bin/glnxa64/libmwm_lxe.so+10422502
[ 10] 0x7fffdee04891
/usr/local/Matlab/R2016a/bin/glnxa64/libmwm_lxe.so+10197137
[ 11] 0x7fffde8d2b2a
/usr/local/Matlab/R2016a/bin/glnxa64/libmwm_lxe.so+04750122
[ 12] 0x7fffde8d3a4c
/usr/local/Matlab/R2016a/bin/glnxa64/libmwm_lxe.so+04753996
[ 13] 0x7fffde8d1ebc
/usr/local/Matlab/R2016a/bin/glnxa64/libmwm_lxe.so+04746940
[ 14] 0x7fffde8cf9ea
/usr/local/Matlab/R2016a/bin/glnxa64/libmwm_lxe.so+04737514
[ 15] 0x7fffde8cfdb1
/usr/local/Matlab/R2016a/bin/glnxa64/libmwm_lxe.so+04738481
[ 16] 0x7fffde8d1a63
/usr/local/Matlab/R2016a/bin/glnxa64/libmwm_lxe.so+04745827
[ 17] 0x7fffde8d1be9
/usr/local/Matlab/R2016a/bin/glnxa64/libmwm_lxe.so+04746217
[ 18] 0x7fffde98216f
/usr/local/Matlab/R2016a/bin/glnxa64/libmwm_lxe.so+05468527
[ 19] 0x7fffde98522a

Re: [Mesa-dev] [PATCH 05/32] [v2] gbm: Export a getter for per plane handles

2017-01-12 Thread Daniel Stone
Hi,

On 5 January 2017 at 04:43, Ben Widawsky  wrote:
> diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
> index c61d56b44a..f9c1afd8cb 100644
> --- a/src/gbm/backends/dri/gbm_dri.c
> +++ b/src/gbm/backends/dri/gbm_dri.c
> @@ -622,6 +622,33 @@ gbm_dri_bo_get_planes(struct gbm_bo *_bo)
> return get_number_planes(dri, bo->image);
>  }
>
> +static union gbm_bo_handle
> +gbm_dri_bo_get_handle_for_plane(struct gbm_bo *_bo, int plane)
> +{
> +   struct gbm_dri_device *dri = gbm_dri_device(_bo->gbm);
> +   struct gbm_dri_bo *bo = gbm_dri_bo(_bo);
> +   union gbm_bo_handle ret;
> +   ret.s32 = -1;
> +
> +   if (!dri->image || dri->image->base.version < 13 || 
> !dri->image->fromPlanar) {
> +  errno = ENOSYS;
> +  return ret;
> +   }
> +
> +   if (plane >= get_number_planes(dri, bo->image))
> +  return ret;

if (!bo->image) {
   ret.s32 = bo->handle;
   return ret;
}

Cheers,
Daniel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >