[Mesa-dev] [PATCH 6/6] i965: remove brw_lower_texture_gradients

2016-11-30 Thread Iago Toral Quiroga
This has been ported to NIR now so we don'tneed to keep the GLSL IR
lowering any more.
---
 src/mesa/drivers/dri/i965/Makefile.sources |   1 -
 src/mesa/drivers/dri/i965/brw_context.h|   2 -
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp |   2 +-
 src/mesa/drivers/dri/i965/brw_link.cpp |   1 -
 .../dri/i965/brw_lower_texture_gradients.cpp   | 353 -
 5 files changed, 1 insertion(+), 358 deletions(-)
 delete mode 100644 src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index 1c33ea5..0a7ba1b 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -128,7 +128,6 @@ i965_FILES = \
brw_gs_state.c \
brw_gs_surface_state.c \
brw_link.cpp \
-   brw_lower_texture_gradients.cpp \
brw_meta_util.c \
brw_meta_util.h \
brw_misc_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 550eefe..46091b6 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1707,8 +1707,6 @@ brw_program_reloc(struct brw_context *brw, uint32_t 
state_offset,
 }
 
 bool brw_do_cubemap_normalize(struct exec_list *instructions);
-bool brw_lower_texture_gradients(struct brw_context *brw,
- struct exec_list *instructions);
 
 extern const char * const conditional_modifier[16];
 extern const char *const pred_ctrl_align16[16];
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index c5b50e1..4ef1a29 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -700,7 +700,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg 
dst, struct brw_reg src
 break;
   case SHADER_OPCODE_TXD:
  if (inst->shadow_compare) {
-/* Gen7.5+.  Otherwise, lowered by brw_lower_texture_gradients(). 
*/
+/* Gen7.5+.  Otherwise, lowered in NIR */
 assert(devinfo->gen >= 8 || devinfo->is_haswell);
 msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
  } else {
diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp 
b/src/mesa/drivers/dri/i965/brw_link.cpp
index 3f6041b..5fa47b8 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -128,7 +128,6 @@ process_glsl_ir(struct brw_context *brw,
   lower_if_to_cond_assign(shader->Stage, shader->ir, 16);
 
do_lower_texture_projection(shader->ir);
-   brw_lower_texture_gradients(brw, shader->ir);
do_vec_index_to_cond_assign(shader->ir);
lower_vector_insert(shader->ir, true);
lower_offset_arrays(shader->ir);
diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp 
b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
deleted file mode 100644
index 0848e1e..000
--- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
+++ /dev/null
@@ -1,353 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_lower_texture_gradients.cpp
- */
-
-#include "compiler/glsl/ir.h"
-#include "compiler/glsl/ir_builder.h"
-#include "program/prog_instruction.h"
-#include "brw_context.h"
-
-using namespace ir_builder;
-
-class lower_texture_grad_visitor : public ir_hierarchical_visitor {
-public:
-   lower_texture_grad_visitor(bool has_sample_d_c)
-  : has_sample_d_c(has_sample_d_c)
-   {
-  progress = false;
-   }
-
-   ir_visitor_status visit_leave(ir_texture *ir);
-
-
-   bool progress;
-   bool has_sample_d_c;
-
-private:
-   void emit(ir_variable *, ir_rvalue *);
-   ir_variable *temp(void *ctx, const glsl_type *type, const 

[Mesa-dev] [PATCH 5/6] i965/nir: enable lowering of texture gradient for shadow samplers

2016-11-30 Thread Iago Toral Quiroga
This gets the lowering on the Vulkan driver too, which is required for
hardware that does not have the sample_l_d message (up to IvyBridge).
---
 src/mesa/drivers/dri/i965/brw_nir.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_nir.c 
b/src/mesa/drivers/dri/i965/brw_nir.c
index f6bcd45..55b16cf 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -596,6 +596,9 @@ brw_nir_apply_sampler_key(nir_shader *nir,
  tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c);
}
 
+   /* Prior to Haswell, we have to lower gradients on shadow samplers */
+   tex_options.lower_txd_shadow = devinfo->gen < 8 && !devinfo->is_haswell;
+
tex_options.lower_y_uv_external = key_tex->y_uv_image_mask;
tex_options.lower_y_u_v_external = key_tex->y_u_v_image_mask;
tex_options.lower_yx_xuxv_external = key_tex->yx_xuxv_image_mask;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] nir/lower_tex: add lowering for texture gradient on shadow samplers

2016-11-30 Thread Iago Toral Quiroga
This is ported from the Intel lowering pass that we use with GLSL IR.
This takes care of lowering texture gradients on shadow samplers other
than cube maps. Intel hardware requires this for gen < 8.
---
 src/compiler/nir/nir.h   |  7 +++
 src/compiler/nir/nir_lower_tex.c | 40 
 2 files changed, 47 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index ed388c6..d494d5f 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2437,6 +2437,13 @@ typedef struct nir_lower_tex_options {
 * If true, lower nir_texop_txd on cube maps with nir_texop_txl.
 */
bool lower_txd_cube_map;
+
+   /**
+* If true, lower nir_texop_txd on shadow samplers (except cube maps)
+* with nir_texop_txl. Notice that cube map shadow samplers are lowered
+* with lower_txd_cube_map.
+*/
+   bool lower_txd_shadow;
 } nir_lower_tex_options;
 
 bool nir_lower_tex(nir_shader *shader,
diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c
index c7c3db2..d5ea509 100644
--- a/src/compiler/nir/nir_lower_tex.c
+++ b/src/compiler/nir/nir_lower_tex.c
@@ -556,6 +556,40 @@ lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
 }
 
 static void
+lower_gradient_shadow(nir_builder *b, nir_tex_instr *tex)
+{
+   assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
+   assert(tex->is_shadow);
+   assert(tex->op == nir_texop_txd);
+   assert(tex->dest.is_ssa);
+
+   /* Use textureSize() to get the width and height of LOD 0 */
+   nir_ssa_def *size = get_texture_size(b, tex);
+
+   /* Scale the gradients by width and height.  Effectively, the incoming
+* gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
+* GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
+*/
+   nir_ssa_def *dPdx = nir_fmul(b, tex->src[2].src.ssa, size);
+   nir_ssa_def *dPdy = nir_fmul(b, tex->src[3].src.ssa, size);
+
+   nir_ssa_def *rho;
+   if (dPdx->num_components == 1) {
+  rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
+   } else {
+  rho = nir_fmax(b,
+ nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
+ nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
+   }
+
+   /* lod = log2(rho).  We're ignoring GL state biases for now. */
+   nir_ssa_def *lod = nir_flog2(b, rho);
+
+   /* Replace the gradient instruction with an equivalent lod instruction */
+   replace_gradient_with_lod(b, lod, tex);
+}
+
+static void
 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
 {
b->cursor = nir_before_instr(>instr);
@@ -786,6 +820,12 @@ nir_lower_tex_block(nir_block *block, nir_builder *b,
  lower_gradient_cube_map(b, tex);
  progress = true;
   }
+
+  if (tex->op == nir_texop_txd && options->lower_txd_shadow &&
+  tex->is_shadow && tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) {
+ lower_gradient_shadow(b, tex);
+ progress = true;
+  }
}
 
return progress;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/6] i965: port texture gradient lowering to NIR

2016-11-30 Thread Iago Toral Quiroga
We need this lowering in Vulkan too do texture gradients with cube map textures
(and also with shadow samplers in IvyBridge).

Currently, we have the brw_lower_texture_gradients() pass that works on GLSL IR
and is called from the OpenGL driver. This pass is actually two passes in one:

1. A lowering pass for cube maps that is required in all cases.
2. A lowering pass for shadow samplers, that is only required in pre-haswell
   hardware.

This series ports the lowering pass to NIR and splits it into two different
passes, one for cube maps and one for shadow samplers other than cube maps.
I think this makes sense, because the implementation for both cases is
different and the conditions to activate the lowering are also different,
at least for Intel.

This fixes various Vulkan CTS failures for cube maps in this category:
dEQP-VK.glsl.texture_functions.texturegrad.*

The first patch just makes the existing get_texture_size() function more
generic so we can reuse it from the gradient lowering pass. The next 2 patches
port and enable the lowering for cubemaps and the next two port and enable the
lowering for shadow samplers (for pre-haswell hardware). The last patch gets
rid of the GLSL IR lowering pass.

Iago Toral Quiroga (6):
  nir/lower_tex: generalize get_texture_size()
  nir/lower_tex: add lowering for texture gradient on cube maps
  i965/nir: enable lowering of texture gradient for cube maps
  nir/lower_tex: add lowering for texture gradient on shadow samplers
  i965/nir: enable lowering of texture gradient for shadow samplers
  i965: remove brw_lower_texture_gradients

 src/compiler/nir/nir.h |  12 +
 src/compiler/nir/nir_lower_tex.c   | 312 +-
 src/mesa/drivers/dri/i965/Makefile.sources |   1 -
 src/mesa/drivers/dri/i965/brw_context.h|   2 -
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp |   2 +-
 src/mesa/drivers/dri/i965/brw_link.cpp |   1 -
 .../dri/i965/brw_lower_texture_gradients.cpp   | 353 -
 src/mesa/drivers/dri/i965/brw_nir.c|   4 +
 8 files changed, 324 insertions(+), 363 deletions(-)
 delete mode 100644 src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp

-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] i965/nir: enable lowering of texture gradient for cube maps

2016-11-30 Thread Iago Toral Quiroga
This gets the lowering on the Vulkan driver too.

Fixes Vulkan CTS cube map texture gradient tests in:
dEQP-VK.glsl.texture_functions.texturegrad.*
---
 src/mesa/drivers/dri/i965/brw_nir.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/brw_nir.c 
b/src/mesa/drivers/dri/i965/brw_nir.c
index 763e3ec..f6bcd45 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -467,6 +467,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
   .lower_txp = ~0,
   .lower_txf_offset = true,
   .lower_rect_offset = true,
+  .lower_txd_cube_map = true,
};
 
OPT(nir_lower_tex, _options);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/6] nir/lower_tex: generalize get_texture_size()

2016-11-30 Thread Iago Toral Quiroga
This was written specifically for RECT samplers. Make it more generic so
we can call this from the gradient lowerings too.
---
 src/compiler/nir/nir_lower_tex.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c
index 0efd443..ccca59b 100644
--- a/src/compiler/nir/nir_lower_tex.c
+++ b/src/compiler/nir/nir_lower_tex.c
@@ -154,22 +154,27 @@ get_texture_size(nir_builder *b, nir_tex_instr *tex)
 {
b->cursor = nir_before_instr(>instr);
 
-   /* RECT textures should not be array: */
-   assert(!tex->is_array);
-
nir_tex_instr *txs;
 
txs = nir_tex_instr_create(b->shader, 1);
txs->op = nir_texop_txs;
-   txs->sampler_dim = GLSL_SAMPLER_DIM_RECT;
+   txs->sampler_dim = tex->sampler_dim;
+   txs->is_array = tex->is_array;
+   txs->is_shadow = tex->is_shadow;
+   txs->is_new_style_shadow = tex->is_new_style_shadow;
txs->texture_index = tex->texture_index;
+   txs->texture = (nir_deref_var *)
+  nir_copy_deref(txs, >texture->deref);
+   txs->sampler_index = tex->sampler_index;
+   txs->sampler = (nir_deref_var *)
+  nir_copy_deref(txs, >sampler->deref);
txs->dest_type = nir_type_int;
 
/* only single src, the lod: */
txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0));
txs->src[0].src_type = nir_tex_src_lod;
 
-   nir_ssa_dest_init(>instr, >dest, 2, 32, NULL);
+   nir_ssa_dest_init(>instr, >dest, tex->coord_components, 32, NULL);
nir_builder_instr_insert(b, >instr);
 
return nir_i2f(b, >dest.ssa);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] EGL/android: Enhance pbuffer implementation

2016-11-30 Thread Tomasz Figa
Hi,

On Fri, Nov 25, 2016 at 12:58 PM, Liu Zhiquan  wrote:
> Some dri drivers will pass multiple bits in buffer_mask parameter
> to droid_image_get_buffer(), more than the actual supported buffer
> type combination. For such case, will go through all the bits, and
> will not return error when unsupported buffer is requested, only
> return error when the allocation for supported buffer failed.

Please see my comments inline.

>
> Signed-off-by: Liu Zhiquan 
> Signed-off-by: Long, Zhifang 
> ---
>  src/egl/drivers/dri2/platform_android.c | 209 
> +++-
>  1 file changed, 126 insertions(+), 83 deletions(-)
>
> diff --git a/src/egl/drivers/dri2/platform_android.c 
> b/src/egl/drivers/dri2/platform_android.c
> index 373e2c0..c70423d 100644
> --- a/src/egl/drivers/dri2/platform_android.c
> +++ b/src/egl/drivers/dri2/platform_android.c
> @@ -392,13 +392,13 @@ droid_destroy_surface(_EGLDriver *drv, _EGLDisplay 
> *disp, _EGLSurface *surf)
> }
>
> if (dri2_surf->dri_image_back) {
> -  _eglLog(_EGL_DEBUG, "%s : %d : destroy dri_image_back", __func__, 
> __LINE__);
> +  _eglLog(_EGL_DEBUG, "destroy dri_image_back");
>dri2_dpy->image->destroyImage(dri2_surf->dri_image_back);
>dri2_surf->dri_image_back = NULL;
> }
>
> if (dri2_surf->dri_image_front) {
> -  _eglLog(_EGL_DEBUG, "%s : %d : destroy dri_image_front", __func__, 
> __LINE__);
> +  _eglLog(_EGL_DEBUG, "destroy dri_image_front");

Patch description mentions only a change to handle multiple buffer
bits. Any other changes should be sent in separate patches.

>dri2_dpy->image->destroyImage(dri2_surf->dri_image_front);
>dri2_surf->dri_image_front = NULL;
> }
> @@ -434,81 +434,98 @@ update_buffers(struct dri2_egl_surface *dri2_surf)
>  }
>
>  static int
> -get_back_bo(struct dri2_egl_surface *dri2_surf)
> +get_front_bo(struct dri2_egl_surface *dri2_surf, unsigned int format)
>  {
> struct dri2_egl_display *dri2_dpy =
>dri2_egl_display(dri2_surf->base.Resource.Display);
> -   int fourcc, pitch;
> -   int offset = 0, fd;
>
> -   if (dri2_surf->dri_image_back)
> +   if (dri2_surf->dri_image_front)
> +   {

style: This file seems to follow the convention of opening brace at
the same line as the statement.

> +  _eglLog(_EGL_WARNING, "dri2_image_front allocated !");

This is a normal case, there is no need to print anything here.

>return 0;
> -
> -   if (!dri2_surf->buffer)
> -  return -1;
> -
> -   fd = get_native_buffer_fd(dri2_surf->buffer);
> -   if (fd < 0) {
> -  _eglLog(_EGL_WARNING, "Could not get native buffer FD");
> -  return -1;
> }
>
> -   fourcc = get_fourcc(dri2_surf->buffer->format);
> -
> -   pitch = dri2_surf->buffer->stride *
> -  get_format_bpp(dri2_surf->buffer->format);
> -
> -   if (fourcc == -1 || pitch == 0) {
> -  _eglLog(_EGL_WARNING, "Invalid buffer fourcc(%x) or pitch(%d)",
> -  fourcc, pitch);
> -  return -1;
> +   if (dri2_surf->base.Type == EGL_WINDOW_BIT) {
> +  /* According current EGL spec, front buffer rendering
> +   * for window surface is not supported now.
> +   * and mesa doesn't have the implemetation of this case.

typo: s/implemetation/implementation/

> +   * Add warnning message, but not treat it as error.

typo: s/warnning/warning/

> +   */
> +   _eglLog(_EGL_DEBUG, "front buffer for window surface is not supported 
> now !");

nit: No need for exclamation mark. Also the message could be a bit
more informational, e.g.

"DRI driver requested unsupported front buffer for window surface"

> +

We can just return 0 here, no need to fall through to the end of the function.

> +   } else if (dri2_surf->base.Type == EGL_PBUFFER_BIT) {

We won't be called with anything else than window or pbuffer bit here,
because we don't advertise pixmap support and createPixmapSurface is
stubbed out. For better coding style (less indentation) it's enough to
just return 0 in the if above and then move the code below out of the
conditional block completely.

> +
> +   dri2_surf->dri_image_front =
> +  dri2_dpy->image->createImage(dri2_dpy->dri_screen,
> +  dri2_surf->base.Width,
> +  dri2_surf->base.Height,
> +  format,
> +  0,
> +  dri2_surf);
> +  if (!dri2_surf->dri_image_front)
> +  {

Style: Brace should be on the same linen as if statement.

> + _eglLog(_EGL_WARNING, "dri2_image_front allocation failed !");

No need for exclamation mark.

> + return -1;
> +  }
> +   } else {
> +  _eglLog(_EGL_WARNING, "pixmap is not supported now !");
> }

This else block is not needed, as I explained above.

>
> -   dri2_surf->dri_image_back =

[Mesa-dev] [PATCH v2 1/7] intel/blorp_blit: Create structure for src & dst coordinates

2016-11-30 Thread Jordan Justen
This will be useful for splitting blits into smaller sizes.

We also make the coordinates of type double rather than float. Since
we will be splitting and scaling the coordinates, we might require
extra precision in the calculations.

v2:
 * Use double instead of float. (Jason)

Signed-off-by: Jordan Justen 
---
 src/intel/blorp/blorp_blit.c | 75 +---
 1 file changed, 56 insertions(+), 19 deletions(-)

diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index d8d022d..ba73016 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -1486,15 +1486,20 @@ surf_retile_w_to_y(const struct isl_device *isl_dev,
info->tile_y_sa /= 2;
 }
 
+struct blt_axis {
+   double src0, src1, dst0, dst1;
+   bool mirror;
+};
+
+struct blt_coords {
+   struct blt_axis x, y;
+};
+
 static void
 do_blorp_blit(struct blorp_batch *batch,
   struct blorp_params *params,
   struct brw_blorp_blit_prog_key *wm_prog_key,
-  float src_x0, float src_y0,
-  float src_x1, float src_y1,
-  float dst_x0, float dst_y0,
-  float dst_x1, float dst_y1,
-  bool mirror_x, bool mirror_y)
+  const struct blt_coords *coords)
 {
const struct gen_device_info *devinfo = batch->blorp->isl_dev->info;
 
@@ -1521,15 +1526,19 @@ do_blorp_blit(struct blorp_batch *batch,
/* Round floating point values to nearest integer to avoid "off by one 
texel"
 * kind of errors when blitting.
 */
-   params->x0 = params->wm_inputs.discard_rect.x0 = roundf(dst_x0);
-   params->y0 = params->wm_inputs.discard_rect.y0 = roundf(dst_y0);
-   params->x1 = params->wm_inputs.discard_rect.x1 = roundf(dst_x1);
-   params->y1 = params->wm_inputs.discard_rect.y1 = roundf(dst_y1);
+   params->x0 = params->wm_inputs.discard_rect.x0 = roundf(coords->x.dst0);
+   params->y0 = params->wm_inputs.discard_rect.y0 = roundf(coords->y.dst0);
+   params->x1 = params->wm_inputs.discard_rect.x1 = roundf(coords->x.dst1);
+   params->y1 = params->wm_inputs.discard_rect.y1 = roundf(coords->y.dst1);
 
brw_blorp_setup_coord_transform(>wm_inputs.coord_transform[0],
-   src_x0, src_x1, dst_x0, dst_x1, mirror_x);
+   coords->x.src0, coords->x.src1,
+   coords->x.dst0, coords->x.dst1,
+   coords->x.mirror);
brw_blorp_setup_coord_transform(>wm_inputs.coord_transform[1],
-   src_y0, src_y1, dst_y0, dst_y1, mirror_y);
+   coords->y.src0, coords->y.src1,
+   coords->y.dst0, coords->y.dst1,
+   coords->y.mirror);
 
if (devinfo->gen > 6 &&
params->dst.surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) {
@@ -1771,10 +1780,24 @@ blorp_blit(struct blorp_batch *batch,
   minify(params.src.surf.logical_level0_px.height, src_level) *
   wm_prog_key.y_scale - 1.0f;
 
-   do_blorp_blit(batch, , _prog_key,
- src_x0, src_y0, src_x1, src_y1,
- dst_x0, dst_y0, dst_x1, dst_y1,
- mirror_x, mirror_y);
+   struct blt_coords coords = {
+  .x = {
+ .src0 = src_x0,
+ .src1 = src_x1,
+ .dst0 = dst_x0,
+ .dst1 = dst_x1,
+ .mirror = mirror_x
+  },
+  .y = {
+ .src0 = src_y0,
+ .src1 = src_y1,
+ .dst0 = dst_y0,
+ .dst1 = dst_y1,
+ .mirror = mirror_y
+  }
+   };
+
+   do_blorp_blit(batch, , _prog_key, );
 
if (isl_surface_debug_dump)
   blorp_surf_dump(batch->blorp, dst_surf, "blorp-blit-dst-after");
@@ -2091,10 +2114,24 @@ blorp_copy(struct blorp_batch *batch,
   wm_prog_key.need_dst_offset = true;
}
 
-   do_blorp_blit(batch, , _prog_key,
- src_x, src_y, src_x + src_width, src_y + src_height,
- dst_x, dst_y, dst_x + dst_width, dst_y + dst_height,
- false, false);
+   struct blt_coords coords = {
+  .x = {
+ .src0 = src_x,
+ .src1 = src_x + src_width,
+ .dst0 = dst_x,
+ .dst1 = dst_x + dst_width,
+ .mirror = false
+  },
+  .y = {
+ .src0 = src_y,
+ .src1 = src_y + src_height,
+ .dst0 = dst_y,
+ .dst1 = dst_y + dst_height,
+ .mirror = false
+  }
+   };
+
+   do_blorp_blit(batch, , _prog_key, );
 
if (isl_surface_debug_dump)
   blorp_surf_dump(batch->blorp, dst_surf, "blorp-copy-dst-after");
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 7/7] i965: Increase max texture to 16k for gen7+

2016-11-30 Thread Jordan Justen
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98297
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_context.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index b928f94..4ca77c7 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -527,14 +527,21 @@ brw_initialize_context_constants(struct brw_context *brw)
 
ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
-   ctx->Const.MaxRenderbufferSize = 8192;
-   ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
+   if (brw->gen >= 7) {
+  ctx->Const.MaxRenderbufferSize = 16384;
+  ctx->Const.MaxTextureLevels = MIN2(15 /* 16384 */, MAX_TEXTURE_LEVELS);
+  ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
+   } else {
+  ctx->Const.MaxRenderbufferSize = 8192;
+  ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
+  ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
+   }
ctx->Const.Max3DTextureLevels = 12; /* 2048 */
-   ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
ctx->Const.MaxTextureMbytes = 1536;
ctx->Const.MaxTextureRectSize = 1 << 12;
ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+   ctx->Const.MaxTextureLodBias = 15.0;
ctx->Const.StripTextureBorder = true;
if (brw->gen >= 7) {
   ctx->Const.MaxProgramTextureGatherComponents = 4;
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/7] intel/blorp_blit: Split blorp blits if they are too large

2016-11-30 Thread Jordan Justen
We rename do_blorp_blit() to try_blorp_blit(), and add a return error
if the surface size for the blit is too large. Now, do_blorp_blit() is
rewritten to try to split the blit into smaller operations if
try_blorp_blit() fails.

Note: In this commit, try_blorp_blit() will always attempt to blit and
never return an error, which matches the previous behavior. We will
enable the size checking and splitting in a future commit.

The motivation for this splitting is that in some cases when we
flatten an image, it's dimensions grow, and this can then exceed the
programmable hardware limits. An example is w-tiled+MSAA blits.

v2:
 * Use double instead of float. (Jason)

Signed-off-by: Jordan Justen 
---
 src/intel/blorp/blorp_blit.c | 93 +---
 1 file changed, 87 insertions(+), 6 deletions(-)

diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index ba73016..b12a4ec 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -1495,11 +1495,18 @@ struct blt_coords {
struct blt_axis x, y;
 };
 
-static void
-do_blorp_blit(struct blorp_batch *batch,
-  struct blorp_params *params,
-  struct brw_blorp_blit_prog_key *wm_prog_key,
-  const struct blt_coords *coords)
+#define BLIT_WIDTH_TOO_LARGE  1
+#define BLIT_HEIGHT_TOO_LARGE 2
+
+/* Try to blit. If the surface parameters exceed the size allowed by hardware,
+ * then a bitmask of BLIT_WIDTH_TOO_LARGE and BLIT_HEIGHT_TOO_LARGE will be
+ * returned. If 0 is returned, then the blit was successful.
+ */
+static unsigned
+try_blorp_blit(struct blorp_batch *batch,
+   struct blorp_params *params,
+   struct brw_blorp_blit_prog_key *wm_prog_key,
+   const struct blt_coords *coords)
 {
const struct gen_device_info *devinfo = batch->blorp->isl_dev->info;
 
@@ -1702,7 +1709,81 @@ do_blorp_blit(struct blorp_batch *batch,
 
brw_blorp_get_blit_kernel(batch->blorp, params, wm_prog_key);
 
-   batch->blorp->exec(batch, params);
+   unsigned result = 0;
+
+   if (result == 0) {
+  batch->blorp->exec(batch, params);
+   }
+
+   return result;
+}
+
+/* Adjust split blit source coordinates for the current destination
+ * coordinates.
+ */
+static void
+adjust_split_coords(const struct blt_axis *orig,
+struct blt_axis *split_coords,
+double scale)
+{
+   double delta0 = scale * (split_coords->dst0 - orig->dst0);
+   double delta1 = scale * (split_coords->dst1 - orig->dst1);
+   split_coords->src0 = orig->src0 + (scale >= 0.0 ? delta0 : delta1);
+   split_coords->src1 = orig->src1 + (scale >= 0.0 ? delta1 : delta0);
+}
+
+static void
+do_blorp_blit(struct blorp_batch *batch,
+  struct blorp_params *params,
+  struct brw_blorp_blit_prog_key *wm_prog_key,
+  const struct blt_coords *orig)
+{
+   struct blt_coords split_coords = *orig;
+   double w = orig->x.dst1 - orig->x.dst0;
+   double h = orig->y.dst1 - orig->y.dst0;
+   double x_scale = (orig->x.src1 - orig->x.src0) / w;
+   double y_scale = (orig->y.src1 - orig->y.src0) / h;
+   if (orig->x.mirror)
+  x_scale = -x_scale;
+   if (orig->y.mirror)
+  y_scale = -y_scale;
+
+   bool x_done, y_done;
+   do {
+  unsigned result =
+ try_blorp_blit(batch, params, wm_prog_key, _coords);
+
+  if (result & BLIT_WIDTH_TOO_LARGE) {
+ w /= 2.0;
+ assert(w >= 1.0);
+ split_coords.x.dst1 = MIN2(split_coords.x.dst0 + w, orig->x.dst1);
+ adjust_split_coords(>x, _coords.x, x_scale);
+  }
+  if (result & BLIT_HEIGHT_TOO_LARGE) {
+ h /= 2.0;
+ assert(h >= 1.0);
+ split_coords.y.dst1 = MIN2(split_coords.y.dst0 + h, orig->y.dst1);
+ adjust_split_coords(>y, _coords.y, y_scale);
+  }
+
+  if (result != 0)
+ continue;
+
+  y_done = (orig->y.dst1 - split_coords.y.dst1 < 0.5);
+  x_done = y_done && (orig->x.dst1 - split_coords.x.dst1 < 0.5);
+  if (x_done) {
+ break;
+  } else if (y_done) {
+ split_coords.x.dst0 += w;
+ split_coords.x.dst1 = MIN2(split_coords.x.dst0 + w, orig->x.dst1);
+ split_coords.y.dst0 = orig->y.dst0;
+ adjust_split_coords(>x, _coords.x, x_scale);
+  } else {
+ split_coords.y.dst0 += h;
+ split_coords.y.dst1 = MIN2(split_coords.y.dst0 + h, orig->y.dst1);
+ adjust_split_coords(>y, _coords.y, y_scale);
+  }
+   } while (true);
 }
 
 void
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/7] intel/blorp_blit: Adjust blorp surface parameters for split blits

2016-11-30 Thread Jordan Justen
If try_blorp_blit() previously returned that a blit was too large,
shrink_surface_params() will be used to update the surface parameters
for the smaller blit so the blit operation can proceed.

v2:
 * Use double instead of float. (Jason)

Signed-off-by: Jordan Justen 
---
 src/intel/blorp/blorp_blit.c | 108 +--
 1 file changed, 105 insertions(+), 3 deletions(-)

diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index b12a4ec..5ca3190 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -1486,6 +1486,12 @@ surf_retile_w_to_y(const struct isl_device *isl_dev,
info->tile_y_sa /= 2;
 }
 
+static bool
+can_shrink_surfaces(const struct blorp_params *params)
+{
+   return false;
+}
+
 struct blt_axis {
double src0, src1, dst0, dst1;
bool mirror;
@@ -1732,12 +1738,98 @@ adjust_split_coords(const struct blt_axis *orig,
split_coords->src1 = orig->src1 + (scale >= 0.0 ? delta1 : delta0);
 }
 
+static const struct isl_extent2d
+get_px_size_sa(const struct isl_surf *surf)
+{
+   static const struct isl_extent2d one_to_one = { .w = 1, .h = 1 };
+
+   if (surf->msaa_layout != ISL_MSAA_LAYOUT_INTERLEAVED)
+  return one_to_one;
+   else
+  return isl_get_interleaved_msaa_px_size_sa(surf->samples);
+}
+
+static void
+shrink_surface_params(const struct isl_device *dev,
+  struct brw_blorp_surface_info *info,
+  double *x0, double *x1, double *y0, double *y1)
+{
+   uint32_t byte_offset, x_offset_sa, y_offset_sa, size;
+   struct isl_extent2d px_size_sa;
+   int adjust;
+
+   surf_convert_to_single_slice(dev, info);
+
+   px_size_sa = get_px_size_sa(>surf);
+
+   x_offset_sa = (uint32_t)*x0 * px_size_sa.w + info->tile_x_sa;
+   y_offset_sa = (uint32_t)*y0 * px_size_sa.h + info->tile_y_sa;
+   isl_tiling_get_intratile_offset_sa(dev, info->surf.tiling,
+  info->surf.format, info->surf.row_pitch,
+  x_offset_sa, y_offset_sa,
+  _offset,
+  >tile_x_sa, >tile_y_sa);
+
+   info->addr.offset += byte_offset;
+
+   adjust = (int)info->tile_x_sa / px_size_sa.w - (int)*x0;
+   *x0 += adjust;
+   *x1 += adjust;
+   info->tile_x_sa = 0;
+
+   adjust = (int)info->tile_y_sa / px_size_sa.h - (int)*y0;
+   *y0 += adjust;
+   *y1 += adjust;
+   info->tile_y_sa = 0;
+
+   size = MIN2((uint32_t)ceilf(*x1), info->surf.logical_level0_px.width);
+   double phys_scale =
+  info->surf.phys_level0_sa.width / info->surf.logical_level0_px.width;
+   info->surf.logical_level0_px.width = size;
+   info->surf.phys_level0_sa.width =
+  (uint32_t)ceilf(phys_scale * size);
+
+   size = MIN2((uint32_t)ceilf(*y1), info->surf.logical_level0_px.height);
+   phys_scale =
+  info->surf.phys_level0_sa.height / info->surf.logical_level0_px.height;
+   info->surf.logical_level0_px.height = (uint32_t)ceilf(size);
+   info->surf.phys_level0_sa.height =
+  (uint32_t)ceilf(phys_scale * size);
+}
+
+static void
+shrink_surfaces(const struct isl_device *dev,
+struct blorp_params *params,
+struct brw_blorp_blit_prog_key *wm_prog_key,
+struct blt_coords *coords)
+{
+   /* Shrink source surface */
+   shrink_surface_params(dev,
+ >src,
+ >x.src0,
+ >x.src1,
+ >y.src0,
+ >y.src1);
+   wm_prog_key->need_src_offset = false;
+
+   /* Shrink destination surface */
+   shrink_surface_params(dev,
+ >dst,
+ >x.dst0,
+ >x.dst1,
+ >y.dst0,
+ >y.dst1);
+   wm_prog_key->need_dst_offset = wm_prog_key->dst_rgb;
+}
+
 static void
 do_blorp_blit(struct blorp_batch *batch,
-  struct blorp_params *params,
+  const struct blorp_params *orig_params,
   struct brw_blorp_blit_prog_key *wm_prog_key,
   const struct blt_coords *orig)
 {
+   struct blorp_params params;
+   struct blt_coords blit_coords;
struct blt_coords split_coords = *orig;
double w = orig->x.dst1 - orig->x.dst0;
double h = orig->y.dst1 - orig->y.dst0;
@@ -1749,9 +1841,16 @@ do_blorp_blit(struct blorp_batch *batch,
   y_scale = -y_scale;
 
bool x_done, y_done;
+   bool shrink = false;
do {
+  params = *orig_params;
+  blit_coords = split_coords;
+  if (shrink) {
+ shrink_surfaces(batch->blorp->isl_dev, , wm_prog_key,
+ _coords);
+  }
   unsigned result =
- try_blorp_blit(batch, params, wm_prog_key, _coords);
+ try_blorp_blit(batch, , wm_prog_key, _coords);
 
   if (result & BLIT_WIDTH_TOO_LARGE) {
  w /= 2.0;
@@ -1766,8 +1865,11 @@ do_blorp_blit(struct blorp_batch 

[Mesa-dev] [PATCH v2 6/7] intel/blorp_blit: Add split_blorp_blit_debug switch

2016-11-30 Thread Jordan Justen
Enabling this debug switch causes surface shrinking to happen by
default, and lowers the surface size limit which causes blorp blits to
be split.

Signed-off-by: Jordan Justen 
---
 src/intel/blorp/blorp_blit.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index 58e9e0c..25b93c6 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -29,6 +29,7 @@
 #define FILE_DEBUG_FLAG DEBUG_BLORP
 
 static const bool isl_surface_debug_dump = false;
+static const bool split_blorp_blit_debug = false;
 
 /**
  * Enum to specify the order of arguments in a sampler message
@@ -1515,9 +1516,13 @@ can_shrink_surfaces(const struct blorp_params *params)
 }
 
 static unsigned
-get_max_surface_size()
+get_max_surface_size(const struct gen_device_info *devinfo,
+ const struct blorp_params *params)
 {
-   return 16384;
+   if (split_blorp_blit_debug && can_shrink_surfaces(params))
+  return 16384 >> 4; /* A smaller restriction when debug is enabled */
+   else
+  return 16384;
 }
 
 struct blt_axis {
@@ -1938,7 +1943,7 @@ do_blorp_blit(struct blorp_batch *batch,
   y_scale = -y_scale;
 
bool x_done, y_done;
-   bool shrink = false;
+   bool shrink = split_blorp_blit_debug && can_shrink_surfaces(orig_params);
do {
   params = *orig_params;
   blit_coords = split_coords;
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 5/7] intel/blorp_blit: Enable splitting large blorp blits

2016-11-30 Thread Jordan Justen
Detect when the surface sizes are too large for a blorp blit. When it
is too large, the blorp blit will be split into a smaller operation
and attempted again.

For gen7, this fixes the cts test:

ES3-CTS.gtf.GL3Tests.framebuffer_blit.framebuffer_blit_functionality_multisampled_to_singlesampled_blit

It will also enable us to increase our renderable size from 8k x 8k to
16k x 16k.

Signed-off-by: Jordan Justen 
---
 src/intel/blorp/blorp_blit.c | 37 -
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index 782144a..58e9e0c 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -1487,9 +1487,37 @@ surf_retile_w_to_y(const struct isl_device *isl_dev,
 }
 
 static bool
+can_shrink_surface(const struct brw_blorp_surface_info *surf)
+{
+   return
+  /* The current code doesn't support offsets into the aux buffers. This
+   * should be possible, but we need to make sure the offset is page
+   * aligned for both the surface and the aux buffer surface. Generally
+   * this mean using the page aligned offset for the aux buffer.
+   *
+   * Currently the cases where we must split the blit are limited to cases
+   * where we don't have a aux buffer.
+   */
+  surf->aux_addr.buffer == NULL &&
+  /* We can't support splitting the blit for gen <= 7, because the qpitch
+   * size is calculated by the hardware based on the surface height for
+   * gen <= 7. In gen >= 8, the qpitch is controlled by the driver.
+   */
+  surf->surf.msaa_layout != ISL_MSAA_LAYOUT_ARRAY;
+}
+
+static bool
 can_shrink_surfaces(const struct blorp_params *params)
 {
-   return false;
+   return
+  can_shrink_surface(>src) &&
+  can_shrink_surface(>dst);
+}
+
+static unsigned
+get_max_surface_size()
+{
+   return 16384;
 }
 
 struct blt_axis {
@@ -1716,6 +1744,13 @@ try_blorp_blit(struct blorp_batch *batch,
brw_blorp_get_blit_kernel(batch->blorp, params, wm_prog_key);
 
unsigned result = 0;
+   unsigned max_surface_size = get_max_surface_size(devinfo, params);
+   if (params->src.surf.logical_level0_px.width > max_surface_size ||
+   params->dst.surf.logical_level0_px.width > max_surface_size)
+  result |= BLIT_WIDTH_TOO_LARGE;
+   if (params->src.surf.logical_level0_px.height > max_surface_size ||
+   params->dst.surf.logical_level0_px.height > max_surface_size)
+  result |= BLIT_HEIGHT_TOO_LARGE;
 
if (result == 0) {
   batch->blorp->exec(batch, params);
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 4/7] intel/blorp_blit: Move RGB=>R conversion to follow blit splitting

2016-11-30 Thread Jordan Justen
In blorp_copy, when RGB surfaces are copied, we convert the
destination surface to a Red only surface, but 3 times as wide. This
introduces an implicit restriction of "mod 3" for the destination
width.

It is easier to handle the blorp split buffer offsetting with the
original RGB surface, and do the RGB=>R after this.

Suggested-by: Jason Ekstrand 
Signed-off-by: Jordan Justen 
Cc: Jason Ekstrand 
---
 src/intel/blorp/blorp_blit.c | 113 +--
 1 file changed, 66 insertions(+), 47 deletions(-)

diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index 5ca3190..782144a 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -1823,6 +1823,68 @@ shrink_surfaces(const struct isl_device *dev,
 }
 
 static void
+surf_fake_rgb_with_red(const struct isl_device *isl_dev,
+   struct brw_blorp_surface_info *info,
+   uint32_t *x, uint32_t *width)
+{
+   surf_convert_to_single_slice(isl_dev, info);
+
+   info->surf.logical_level0_px.width *= 3;
+   info->surf.phys_level0_sa.width *= 3;
+   *x *= 3;
+   *width *= 3;
+
+   enum isl_format red_format;
+   switch (info->view.format) {
+   case ISL_FORMAT_R8G8B8_UNORM:
+  red_format = ISL_FORMAT_R8_UNORM;
+  break;
+   case ISL_FORMAT_R8G8B8_UINT:
+  red_format = ISL_FORMAT_R8_UINT;
+  break;
+   case ISL_FORMAT_R16G16B16_UNORM:
+  red_format = ISL_FORMAT_R16_UNORM;
+  break;
+   case ISL_FORMAT_R16G16B16_UINT:
+  red_format = ISL_FORMAT_R16_UINT;
+  break;
+   case ISL_FORMAT_R32G32B32_UINT:
+  red_format = ISL_FORMAT_R32_UINT;
+  break;
+   default:
+  unreachable("Invalid RGB copy destination format");
+   }
+   assert(isl_format_get_layout(red_format)->channels.r.type ==
+  isl_format_get_layout(info->view.format)->channels.r.type);
+   assert(isl_format_get_layout(red_format)->channels.r.bits ==
+  isl_format_get_layout(info->view.format)->channels.r.bits);
+
+   info->surf.format = info->view.format = red_format;
+}
+
+static void
+fake_dest_rgb_with_red(const struct isl_device *dev,
+   struct blorp_params *params,
+   struct brw_blorp_blit_prog_key *wm_prog_key,
+   struct blt_coords *coords)
+{
+   /* Handle RGB destinations for blorp_copy */
+   const struct isl_format_layout *dst_fmtl =
+  isl_format_get_layout(params->dst.surf.format);
+
+   if (dst_fmtl->bpb % 3 == 0) {
+  uint32_t dst_x = coords->x.dst0;
+  uint32_t dst_width = coords->x.dst1 - dst_x;
+  surf_fake_rgb_with_red(dev, >dst,
+ _x, _width);
+  coords->x.dst0 = dst_x;
+  coords->x.dst1 = dst_x + dst_width;
+  wm_prog_key->dst_rgb = true;
+  wm_prog_key->need_dst_offset = true;
+   }
+}
+
+static void
 do_blorp_blit(struct blorp_batch *batch,
   const struct blorp_params *orig_params,
   struct brw_blorp_blit_prog_key *wm_prog_key,
@@ -1849,6 +1911,10 @@ do_blorp_blit(struct blorp_batch *batch,
  shrink_surfaces(batch->blorp->isl_dev, , wm_prog_key,
  _coords);
   }
+
+  fake_dest_rgb_with_red(batch->blorp->isl_dev, , wm_prog_key,
+ _coords);
+
   unsigned result =
  try_blorp_blit(batch, , wm_prog_key, _coords);
 
@@ -2161,46 +2227,6 @@ surf_convert_to_uncompressed(const struct isl_device 
*isl_dev,
info->surf.format = get_copy_format_for_bpb(isl_dev, fmtl->bpb);
 }
 
-static void
-surf_fake_rgb_with_red(const struct isl_device *isl_dev,
-   struct brw_blorp_surface_info *info,
-   uint32_t *x, uint32_t *width)
-{
-   surf_convert_to_single_slice(isl_dev, info);
-
-   info->surf.logical_level0_px.width *= 3;
-   info->surf.phys_level0_sa.width *= 3;
-   *x *= 3;
-   *width *= 3;
-
-   enum isl_format red_format;
-   switch (info->view.format) {
-   case ISL_FORMAT_R8G8B8_UNORM:
-  red_format = ISL_FORMAT_R8_UNORM;
-  break;
-   case ISL_FORMAT_R8G8B8_UINT:
-  red_format = ISL_FORMAT_R8_UINT;
-  break;
-   case ISL_FORMAT_R16G16B16_UNORM:
-  red_format = ISL_FORMAT_R16_UNORM;
-  break;
-   case ISL_FORMAT_R16G16B16_UINT:
-  red_format = ISL_FORMAT_R16_UINT;
-  break;
-   case ISL_FORMAT_R32G32B32_UINT:
-  red_format = ISL_FORMAT_R32_UINT;
-  break;
-   default:
-  unreachable("Invalid RGB copy destination format");
-   }
-   assert(isl_format_get_layout(red_format)->channels.r.type ==
-  isl_format_get_layout(info->view.format)->channels.r.type);
-   assert(isl_format_get_layout(red_format)->channels.r.bits ==
-  isl_format_get_layout(info->view.format)->channels.r.bits);
-
-   info->surf.format = info->view.format = red_format;
-}
-
 void
 blorp_copy(struct blorp_batch *batch,
const struct blorp_surf *src_surf,
@@ 

[Mesa-dev] [PATCH v2 0/7] Split blorp blits; Enable 16k render target on gen7+

2016-11-30 Thread Jordan Justen
git://people.freedesktop.org/~jljusten/mesa split-blorp-blit-v2

(These patches have been applied on top of my ISL surface dumping
patches which are included in the branch above.)

v1:
 https://lists.freedesktop.org/archives/mesa-dev/2016-November/136235.html

v2:
 * Simplify the logic to shrink the surface by moving the RGB=>R
   conversion for blorp_copy to happen after the surface shrinking.
   (Jason)
 * Use doubles during the splitting calculations. (Jason)

This series allows large blorp blits to be split into multiple smaller
blits.

For Haswell, this fixes some OpenGL ES 3.1 CTS tests with stencil.

For gen7+ we also increase the maximum texture/render target size to
16k x 16k. (Up from 8k x 8k.) Related bugzilla:

https://bugs.freedesktop.org/show_bug.cgi?id=98297

Jordan Justen (7):
  intel/blorp_blit: Create structure for src & dst coordinates
  intel/blorp_blit: Split blorp blits if they are too large
  intel/blorp_blit: Adjust blorp surface parameters for split blits
  intel/blorp_blit: Move RGB=>R conversion to follow blit splitting
  intel/blorp_blit: Enable splitting large blorp blits
  intel/blorp_blit: Add split_blorp_blit_debug switch
  i965: Increase max texture to 16k for gen7+

 src/intel/blorp/blorp_blit.c| 419 ++--
 src/mesa/drivers/dri/i965/brw_context.c |  13 +-
 2 files changed, 359 insertions(+), 73 deletions(-)

-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Add i965 plumbing for ARB_post_depth_coverage for i965 (gen9+).

2016-11-30 Thread Chris Forbes
A couple of notes on existing weirdness here:
- Naming of GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT is bizarre (not your
fault)
- Is BRW_PSICMS_INNER really the right thing for the normal mode? Why not
BRW_PSICMS_NORMAL? Perhaps whoever added this stuff can shed some light
here?

Actual change here looks good, so:

Reviewed-by: Chris Forbes 


On Thu, Dec 1, 2016 at 9:00 AM, Plamena Manolova  wrote:

> This extension allows the fragment shader to control whether values in
> gl_SampleMaskIn[] reflect the coverage after application of the early
> depth and stencil tests.
>
> Signed-off-by: Plamena Manolova 
> ---
>  docs/relnotes/13.1.0.html|  1 +
>  src/mesa/drivers/dri/i965/brw_compiler.h |  1 +
>  src/mesa/drivers/dri/i965/brw_fs.cpp |  1 +
>  src/mesa/drivers/dri/i965/gen8_ps_state.c| 13 ++---
>  src/mesa/drivers/dri/i965/intel_extensions.c |  1 +
>  5 files changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/docs/relnotes/13.1.0.html b/docs/relnotes/13.1.0.html
> index 4f76cc2..a160cda 100644
> --- a/docs/relnotes/13.1.0.html
> +++ b/docs/relnotes/13.1.0.html
> @@ -45,6 +45,7 @@ Note: some of the new features are only available with
> certain drivers.
>
>  
>  GL_NV_image_formats on any driver supporting
> GL_ARB_shader_image_load_store (i965, nvc0, radeonsi, softpipe)
> +GL_ARB_post_depth_coverage on i965/gen9+
>  
>
>  Bug fixes
> diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h
> b/src/mesa/drivers/dri/i965/brw_compiler.h
> index 65a7478..410641f 100644
> --- a/src/mesa/drivers/dri/i965/brw_compiler.h
> +++ b/src/mesa/drivers/dri/i965/brw_compiler.h
> @@ -397,6 +397,7 @@ struct brw_wm_prog_data {
> bool computed_stencil;
>
> bool early_fragment_tests;
> +   bool post_depth_coverage;
> bool dispatch_8;
> bool dispatch_16;
> bool dual_src_blend;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index c218f56..ce0c07e 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -6454,6 +6454,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
> void *log_data,
> shader->info->outputs_read);
>
> prog_data->early_fragment_tests = shader->info->fs.early_
> fragment_tests;
> +   prog_data->post_depth_coverage = shader->info->fs.post_depth_coverage;
>
> prog_data->barycentric_interp_modes =
>brw_compute_barycentric_interp_modes(compiler->devinfo, shader);
> diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c
> b/src/mesa/drivers/dri/i965/gen8_ps_state.c
> index a4eb962..33ef023 100644
> --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
> +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
> @@ -53,10 +53,17 @@ gen8_upload_ps_extra(struct brw_context *brw,
>dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE;
>
> if (prog_data->uses_sample_mask) {
> -  if (brw->gen >= 9)
> - dw1 |= BRW_PSICMS_INNER << GEN9_PSX_SHADER_NORMAL_
> COVERAGE_MASK_SHIFT;
> -  else
> +  if (brw->gen >= 9) {
> + if (prog_data->post_depth_coverage) {
> +dw1 |= BRW_PCICMS_DEPTH << GEN9_PSX_SHADER_NORMAL_
> COVERAGE_MASK_SHIFT;
> + }
> + else {
> +dw1 |= BRW_PSICMS_INNER << GEN9_PSX_SHADER_NORMAL_
> COVERAGE_MASK_SHIFT;
> + }
> +  }
> +  else {
>   dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK;
> +  }
> }
>
> if (prog_data->uses_omask)
> diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c
> b/src/mesa/drivers/dri/i965/intel_extensions.c
> index 66079b5..19f4684 100644
> --- a/src/mesa/drivers/dri/i965/intel_extensions.c
> +++ b/src/mesa/drivers/dri/i965/intel_extensions.c
> @@ -415,6 +415,7 @@ intelInitExtensions(struct gl_context *ctx)
>ctx->Extensions.KHR_texture_compression_astc_ldr = true;
>ctx->Extensions.KHR_texture_compression_astc_sliced_3d = true;
>ctx->Extensions.MESA_shader_framebuffer_fetch = true;
> +  ctx->Extensions.ARB_post_depth_coverage = true;
> }
>
> if (ctx->API == API_OPENGL_CORE)
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 4/5] intel/blorp_blit: Add option to dump surfaces on blits

2016-11-30 Thread Jordan Justen
Enabling this option causes the source and destination surfaces to be
dumped out to debug files. The destination is dumped both before and
after the blit operation.

Signed-off-by: Jordan Justen 
---
 src/intel/blorp/blorp_blit.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index 69b98c2..d8d022d 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -28,6 +28,8 @@
 
 #define FILE_DEBUG_FLAG DEBUG_BLORP
 
+static const bool isl_surface_debug_dump = false;
+
 /**
  * Enum to specify the order of arguments in a sampler message
  */
@@ -1711,6 +1713,11 @@ blorp_blit(struct blorp_batch *batch,
struct blorp_params params;
blorp_params_init();
 
+   if (isl_surface_debug_dump) {
+  blorp_surf_dump(batch->blorp, src_surf, "blorp-blit-src");
+  blorp_surf_dump(batch->blorp, dst_surf, "blorp-blit-dst-before");
+   }
+
brw_blorp_surface_info_init(batch->blorp, , src_surf, src_level,
src_layer, src_format, false);
brw_blorp_surface_info_init(batch->blorp, , dst_surf, dst_level,
@@ -1768,6 +1775,9 @@ blorp_blit(struct blorp_batch *batch,
  src_x0, src_y0, src_x1, src_y1,
  dst_x0, dst_y0, dst_x1, dst_y1,
  mirror_x, mirror_y);
+
+   if (isl_surface_debug_dump)
+  blorp_surf_dump(batch->blorp, dst_surf, "blorp-blit-dst-after");
 }
 
 static enum isl_format
@@ -2007,6 +2017,11 @@ blorp_copy(struct blorp_batch *batch,
brw_blorp_surface_info_init(batch->blorp, , dst_surf, dst_level,
dst_layer, ISL_FORMAT_UNSUPPORTED, true);
 
+   if (isl_surface_debug_dump) {
+  blorp_surf_dump(batch->blorp, src_surf, "blorp-copy-src");
+  blorp_surf_dump(batch->blorp, dst_surf, "blorp-copy-dst-before");
+   }
+
struct brw_blorp_blit_prog_key wm_prog_key = {
   .shader_type = BLORP_SHADER_TYPE_BLIT
};
@@ -2080,4 +2095,7 @@ blorp_copy(struct blorp_batch *batch,
  src_x, src_y, src_x + src_width, src_y + src_height,
  dst_x, dst_y, dst_x + dst_width, dst_y + dst_height,
  false, false);
+
+   if (isl_surface_debug_dump)
+  blorp_surf_dump(batch->blorp, dst_surf, "blorp-copy-dst-after");
 }
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 5/5] i965/hiz: Add debug dump for depth/hiz buffers around hiz ops

2016-11-30 Thread Jordan Justen
v2:
 * Use new blorp_surf_dump function. (Jason)

Signed-off-by: Jordan Justen 
Acked-by: Matt Turner 
Cc: Jason Ekstrand 
---
 src/mesa/drivers/dri/i965/brw_blorp.c | 54 +++
 1 file changed, 36 insertions(+), 18 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 6e91084..2a48e0f 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -39,6 +39,8 @@
 
 #define FILE_DEBUG_FLAG DEBUG_BLORP
 
+static const bool isl_hiz_debug_dump = false;
+
 static void
 brw_blorp_map(const struct blorp_context *blorp,
   const struct blorp_address *blorp_addr,
@@ -1019,20 +1021,14 @@ brw_blorp_resolve_color(struct brw_context *brw, struct 
intel_mipmap_tree *mt,
 }
 
 static void
-gen6_blorp_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
-unsigned int level, unsigned int layer, enum blorp_hiz_op 
op)
+hiz_dump_blorp_surf(const struct blorp_context *blorp,
+const struct blorp_surf *surf,
+const char *opname, const char *when)
 {
-   assert(intel_miptree_level_has_hiz(mt, level));
-
-   struct isl_surf isl_tmp[2];
-   struct blorp_surf surf;
-   blorp_surf_for_miptree(brw, , mt, true, (1 << ISL_AUX_USAGE_HIZ),
-  , layer, 1, isl_tmp);
-
-   struct blorp_batch batch;
-   blorp_batch_init(>blorp, , brw, 0);
-   blorp_gen6_hiz_op(, , level, layer, op);
-   blorp_batch_finish();
+   char *basename = ralloc_asprintf(NULL, "%s-%s", opname, when);
+   assert(basename);
+   blorp_surf_dump(blorp, surf, basename);
+   ralloc_free(basename);
 }
 
 /**
@@ -1052,25 +1048,47 @@ intel_hiz_exec(struct brw_context *brw, struct 
intel_mipmap_tree *mt,
 
switch (op) {
case BLORP_HIZ_OP_DEPTH_RESOLVE:
-  opname = "depth resolve";
+  opname = "depth-resolve";
   break;
case BLORP_HIZ_OP_HIZ_RESOLVE:
-  opname = "hiz ambiguate";
+  opname = "hiz-ambiguate";
   break;
case BLORP_HIZ_OP_DEPTH_CLEAR:
-  opname = "depth clear";
+  opname = "depth-clear";
   break;
case BLORP_HIZ_OP_NONE:
-  opname = "noop?";
+  opname = "hiz-noop";
   break;
}
 
DBG("%s %s to mt %p level %d layer %d\n",
__func__, opname, mt, level, layer);
 
+   assert(intel_miptree_level_has_hiz(mt, level));
+
+   struct isl_surf isl_tmp[2];
+   struct blorp_surf surf;
+   struct blorp_batch batch;
+   const bool need_blorp = brw->gen < 8 || isl_hiz_debug_dump;
+
+   if (need_blorp) {
+  blorp_surf_for_miptree(brw, , mt, true, (1 << ISL_AUX_USAGE_HIZ),
+ , layer, 1, isl_tmp);
+  blorp_batch_init(>blorp, , brw, 0);
+   }
+
+   if (isl_hiz_debug_dump)
+  hiz_dump_blorp_surf(batch.blorp, , opname, "before");
+
if (brw->gen >= 8) {
   gen8_hiz_exec(brw, mt, level, layer, op);
} else {
-  gen6_blorp_hiz_exec(brw, mt, level, layer, op);
+  blorp_gen6_hiz_op(, , level, layer, op);
}
+
+   if (isl_hiz_debug_dump)
+  hiz_dump_blorp_surf(batch.blorp, , opname, "after");
+
+   if (need_blorp)
+  blorp_batch_finish();
 }
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/5] intel/isl: Add support for saving surface debug info to files

2016-11-30 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/intel/Makefile.sources |   1 +
 src/intel/isl/isl.h|  14 +++
 src/intel/isl/isl_dump.c   | 217 +
 3 files changed, 232 insertions(+)
 create mode 100644 src/intel/isl/isl_dump.c

diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
index be6bdef..00bcaaf 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -34,6 +34,7 @@ AUBINATOR_GENERATED_FILES = \
 ISL_FILES = \
isl/isl.c \
isl/isl.h \
+   isl/isl_dump.c \
isl/isl_format.c \
isl/isl_priv.h \
isl/isl_storage_image.c
diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h
index 07368f9..a1d431a 100644
--- a/src/intel/isl/isl.h
+++ b/src/intel/isl/isl.h
@@ -1486,6 +1486,20 @@ uint32_t
 isl_surf_get_depth_format(const struct isl_device *dev,
   const struct isl_surf *surf);
 
+/**
+ * @brief Save the isl_surf information out to files for debug purposes.
+ *
+ */
+void
+isl_surf_dump(const struct isl_device *dev,
+  const struct isl_surf *surf,
+  const void *map_addr,
+  unsigned int map_size,
+  const struct isl_surf *aux_surf,
+  const void *aux_map_addr,
+  unsigned int aux_map_size,
+  const char *basename);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/intel/isl/isl_dump.c b/src/intel/isl/isl_dump.c
new file mode 100644
index 000..17ec8c0
--- /dev/null
+++ b/src/intel/isl/isl_dump.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a
+ *  copy of this software and associated documentation files (the "Software"),
+ *  to deal in the Software without restriction, including without limitation
+ *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ *  Software is furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice (including the next
+ *  paragraph) shall be included in all copies or substantial portions of the
+ *  Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
DEALINGS
+ *  IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include "isl.h"
+#include "isl_priv.h"
+#include "common/gen_device_info.h"
+
+#include "util/format_srgb.h"
+#include "util/ralloc.h"
+
+#include "main/macros.h"
+
+/**
+ * @brief Determine if a surface should be dumped.
+ *
+ * Since dumping a surface can produce a lot of data and be time consuming,
+ * this function allows you to filter whether a surface should actually be
+ * dumped. If the application is deterministic, then you can use the sequence
+ * id number to filter output. Other examples are shown commented out.
+ *
+ * Return true when the surface should be dumped.
+ */
+static inline bool
+filter_surface_dumping(uint64_t sequence_id,
+   const struct isl_surf *surf,
+   const void *map_addr,
+   unsigned int map_size,
+   const struct isl_surf *aux_surf,
+   const void *aux_map_addr,
+   unsigned int aux_map_size,
+   const char *basename)
+{
+   const uint64_t single_id = 0;
+   const uint64_t min_id = 0;
+   const uint64_t max_id = 0;
+   return
+  (min_id == 0 || sequence_id >= min_id) &&
+  (max_id == 0 || sequence_id <= max_id) &&
+  (single_id == 0 || sequence_id == single_id) &&
+  /* surf->format == ISL_FORMAT_R8_UINT && */
+  /* surf->msaa_layout == ISL_MSAA_LAYOUT_NONE && */
+  true;
+}
+
+static const char *
+tiling_name(enum isl_tiling tiling)
+{
+#define TILENAME(t) case ISL_TILING_##t: return #t
+   switch(tiling) {
+   TILENAME(LINEAR);
+   TILENAME(W);
+   TILENAME(X);
+   TILENAME(Y0);
+   TILENAME(Yf);
+   TILENAME(Ys);
+   TILENAME(HIZ);
+   TILENAME(CCS);
+   default:
+  return NULL;
+   }
+}
+
+static const char *
+msaa_name(enum isl_msaa_layout layout)
+{
+#define MSAA_NAME(l) case ISL_MSAA_LAYOUT_##l: return #l
+   switch(layout) {
+   MSAA_NAME(NONE);
+   MSAA_NAME(INTERLEAVED);
+   MSAA_NAME(ARRAY);
+   default:
+  return NULL;
+   }
+}
+
+static bool
+dump_surf_info(const struct isl_device *dev,
+   const struct isl_surf *surf,
+   const char *filename)
+{
+   FILE *f = fopen(filename, "w");
+   

[Mesa-dev] [PATCH v2 3/5] intel/blorp: Add blorp_surf_dump function

2016-11-30 Thread Jordan Justen
This function takes care of mapping and unmapping the buffers, and
then calls isl_surf_dump.

Signed-off-by: Jordan Justen 
---
 src/intel/blorp/blorp.c | 35 +++
 src/intel/blorp/blorp.h |  6 ++
 2 files changed, 41 insertions(+)

diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c
index 08ce97d..78aba03 100644
--- a/src/intel/blorp/blorp.c
+++ b/src/intel/blorp/blorp.c
@@ -279,3 +279,38 @@ blorp_gen6_hiz_op(struct blorp_batch *batch,
 
batch->blorp->exec(batch, );
 }
+
+void
+blorp_surf_dump(const struct blorp_context *blorp,
+const struct blorp_surf *surf,
+const char *basename)
+{
+   void *map, *aux_map;
+   unsigned int size, aux_size;
+   bool was_mapped, aux_was_mapped;
+
+   blorp->map(blorp, >addr, , , _mapped);
+   if (map == NULL)
+  return;
+
+   if (surf->aux_addr.buffer) {
+  blorp->map(blorp, >aux_addr, _map, _size, _was_mapped);
+  if (aux_map == NULL) {
+ if (!was_mapped)
+blorp->unmap(blorp, >addr);
+ return;
+  }
+   } else {
+  aux_map = NULL;
+  aux_size = 0;
+   }
+
+   isl_surf_dump(blorp->isl_dev, surf->surf, map, size,
+ aux_map ? surf->aux_surf : NULL, aux_map, aux_size,
+ basename);
+
+   if (!was_mapped)
+  blorp->unmap(blorp, >addr);
+   if (surf->aux_addr.buffer && !aux_was_mapped)
+  blorp->unmap(blorp, >aux_addr);
+}
diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
index 006d7ac..9f29d9b 100644
--- a/src/intel/blorp/blorp.h
+++ b/src/intel/blorp/blorp.h
@@ -39,6 +39,7 @@ extern "C" {
 struct blorp_batch;
 struct blorp_params;
 struct blorp_address;
+struct brw_blorp_surface_info;
 
 struct blorp_context {
void *driver_ctx;
@@ -210,6 +211,11 @@ blorp_gen6_hiz_op(struct blorp_batch *batch,
   struct blorp_surf *surf, unsigned level, unsigned layer,
   enum blorp_hiz_op op);
 
+void
+blorp_surf_dump(const struct blorp_context *blorp,
+const struct blorp_surf *surf,
+const char *basename);
+
 #ifdef __cplusplus
 } /* end extern "C" */
 #endif /* __cplusplus */
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/5] intel/blorp: Support map/unmap of blorp_address

2016-11-30 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/intel/blorp/blorp.h   |  8 +++
 src/mesa/drivers/dri/i965/brw_blorp.c | 39 +++
 2 files changed, 47 insertions(+)

diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
index 823475b..006d7ac 100644
--- a/src/intel/blorp/blorp.h
+++ b/src/intel/blorp/blorp.h
@@ -38,6 +38,7 @@ extern "C" {
 
 struct blorp_batch;
 struct blorp_params;
+struct blorp_address;
 
 struct blorp_context {
void *driver_ctx;
@@ -52,6 +53,13 @@ struct blorp_context {
   uint32_t vb;
} mocs;
 
+   void (*map)(const struct blorp_context *blorp,
+   const struct blorp_address *blorp_addr,
+   void **addr,
+   unsigned int *map_size,
+   bool *mapped_previously);
+   void (*unmap)(const struct blorp_context *blorp,
+ const struct blorp_address *blorp_addr);
bool (*lookup_shader)(struct blorp_context *blorp,
  const void *key, uint32_t key_size,
  uint32_t *kernel_out, void *prog_data_out);
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 4c1d858..6e91084 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -34,10 +34,47 @@
 #include "brw_meta_util.h"
 #include "brw_state.h"
 #include "intel_fbo.h"
+#include "intel_batchbuffer.h"
 #include "intel_debug.h"
 
 #define FILE_DEBUG_FLAG DEBUG_BLORP
 
+static void
+brw_blorp_map(const struct blorp_context *blorp,
+  const struct blorp_address *blorp_addr,
+  void **addr,
+  unsigned int *size,
+  bool *mapped_previously)
+{
+   struct brw_context *brw = blorp->driver_ctx;
+   drm_intel_bo *bo = (drm_intel_bo *)blorp_addr->buffer;
+
+   *mapped_previously = bo->virtual != NULL;
+
+   if (!*mapped_previously) {
+  if (drm_intel_bo_references(brw->batch.bo, bo))
+ intel_batchbuffer_flush(brw);
+
+  int ret = drm_intel_bo_map(bo, /*write_enable*/ false);
+  if (ret == -1) {
+ assert(bo->virtual == NULL);
+ *addr = NULL;
+ return;
+  }
+   }
+
+   *addr = bo->virtual;
+   *size = bo->size;
+}
+
+static void
+brw_blorp_unmap(const struct blorp_context *blorp,
+const struct blorp_address *blorp_addr)
+{
+   drm_intel_bo *bo = (drm_intel_bo *)blorp_addr->buffer;
+   drm_intel_bo_unmap(bo);
+}
+
 static bool
 brw_blorp_lookup_shader(struct blorp_context *blorp,
 const void *key, uint32_t key_size,
@@ -102,6 +139,8 @@ brw_blorp_init(struct brw_context *brw)
   unreachable("Invalid gen");
}
 
+   brw->blorp.map = brw_blorp_map;
+   brw->blorp.unmap = brw_blorp_unmap;
brw->blorp.lookup_shader = brw_blorp_lookup_shader;
brw->blorp.upload_shader = brw_blorp_upload_shader;
 }
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] mesa: Add GL and GLSL plumbing for ARB_post_depth_coverage for i965 (gen9+).

2016-11-30 Thread Timothy Arceri
On Tue, 2016-11-22 at 23:53 +0200, Plamena Manolova wrote:
> This extension allows the fragment shader to control whether values
> in
> gl_SampleMaskIn[] reflect the coverage after application of the early
> depth and stencil tests.
> 
> Signed-off-by: Plamena Manolova 
> ---
>  src/compiler/glsl/ast.h  |  5 +
>  src/compiler/glsl/ast_to_hir.cpp |  5 +
>  src/compiler/glsl/ast_type.cpp   |  8 +++-
>  src/compiler/glsl/glsl_parser.yy | 11 +++
>  src/compiler/glsl/glsl_parser_extras.cpp |  4 
>  src/compiler/glsl/glsl_parser_extras.h   |  4 
>  src/compiler/glsl/linker.cpp |  4 
>  src/compiler/shader_info.h   |  1 +
>  src/mesa/main/extensions_table.h |  1 +
>  src/mesa/main/mtypes.h   |  2 ++
>  src/mesa/main/shaderapi.c|  1 +
>  11 files changed, 45 insertions(+), 1 deletion(-)
> 
> diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
> index 55f9a6c..ad19493 100644
> --- a/src/compiler/glsl/ast.h
> +++ b/src/compiler/glsl/ast.h
> @@ -606,6 +606,11 @@ struct ast_type_qualifier {
>   /** \{ */
>   unsigned blend_support:1; /**< Are there any blend_support_
> qualifiers */
>   /** \} */
> +
> + /**
> +  * Flag set if GL_ARB_post_depth_coverage layout qualifier
> is used.
> +  */
> + unsigned post_depth_coverage:1;
>    }
>    /** \brief Set of flags, accessed by name. */
>    q;
> diff --git a/src/compiler/glsl/ast_to_hir.cpp
> b/src/compiler/glsl/ast_to_hir.cpp
> index 9b8678c..c31da86 100644
> --- a/src/compiler/glsl/ast_to_hir.cpp
> +++ b/src/compiler/glsl/ast_to_hir.cpp
> @@ -3632,6 +3632,11 @@ apply_layout_qualifier_to_variable(const
> struct ast_type_qualifier *qual,
>    _mesa_glsl_error(loc, state, "early_fragment_tests layout
> qualifier only "
> "valid in fragment shader input layout
> declaration.");
> }
> +
> +   if (qual->flags.q.post_depth_coverage) {
> +  _mesa_glsl_error(loc, state, "post_depth_coverage layout
> qualifier only "
> +   "valid in fragment shader input layout
> declaration.");
> +   }
>  }
>  
>  static void
> diff --git a/src/compiler/glsl/ast_type.cpp
> b/src/compiler/glsl/ast_type.cpp
> index 2856f18..1905721 100644
> --- a/src/compiler/glsl/ast_type.cpp
> +++ b/src/compiler/glsl/ast_type.cpp
> @@ -489,6 +489,7 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE
> *loc,
>    break;
> case MESA_SHADER_FRAGMENT:
>    valid_in_mask.flags.q.early_fragment_tests = 1;
> +  valid_in_mask.flags.q.post_depth_coverage = 1;
>    break;
> case MESA_SHADER_COMPUTE:
>    create_cs_ast |=
> @@ -540,6 +541,10 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE
> *loc,
>    state->fs_early_fragment_tests = true;
> }
>  
> +   if (q.flags.q.post_depth_coverage) {
> +  state->fs_post_depth_coverage = true;
> +   }
> +
> if (this->flags.q.vertex_spacing) {
>    if (q.flags.q.vertex_spacing &&
>    this->vertex_spacing != q.vertex_spacing) {
> @@ -671,7 +676,8 @@ ast_type_qualifier::validate_flags(YYLTYPE *loc,
>  bad.flags.q.point_mode ? " point_mode" : "",
>  bad.flags.q.vertices ? " vertices" : "",
>  bad.flags.q.subroutine ? " subroutine" : "",
> -bad.flags.q.subroutine_def ? " subroutine_def" :
> "");
> +bad.flags.q.subroutine_def ? " subroutine_def" :
> "",
> +bad.flags.q.post_depth_coverage ? "
> post_depth_coverage" : "");
> return false;
>  }
>  
> diff --git a/src/compiler/glsl/glsl_parser.yy
> b/src/compiler/glsl/glsl_parser.yy
> index a48dc68..a53f476 100644
> --- a/src/compiler/glsl/glsl_parser.yy
> +++ b/src/compiler/glsl/glsl_parser.yy
> @@ -1373,6 +1373,17 @@ layout_qualifier_id:
>  
>  $$.flags.q.early_fragment_tests = 1;
>   }
> +
> + if (!$$.flags.i &&
> + match_layout_qualifier($1, "post_depth_coverage",
> state) == 0) {
> +if (state->stage != MESA_SHADER_FRAGMENT) {
> +   _mesa_glsl_error(& @1, state,
> +"post_depth_coverage layout
> qualifier only "
> +"valid in fragment shaders");
> +}
> +
> +$$.flags.q.post_depth_coverage = 1;
> + }
>    }
>  
>    /* Layout qualifiers for tessellation evaluation shaders. */
> diff --git a/src/compiler/glsl/glsl_parser_extras.cpp
> b/src/compiler/glsl/glsl_parser_extras.cpp
> index 85a2e94..bc252a0 100644
> --- a/src/compiler/glsl/glsl_parser_extras.cpp
> +++ b/src/compiler/glsl/glsl_parser_extras.cpp
> @@ -295,6 +295,7 @@
> _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context
> *_ctx,
> this->in_qualifier = new(this) ast_type_qualifier();
> this->out_qualifier = new(this) 

[Mesa-dev] [Bug 97524] Samplers referring to the same texture unit with different types should raise GL_INVALID_OPERATION

2016-11-30 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97524

--- Comment #16 from Marek Olšák  ---
Workaround for radeonsi:

CI-VI:
If the resource type and instruction mismatch (e.g. a buffer constant with an
image instruction, or an image resource with a buffer instruction), the
instruction will be ignored (reads return nothing and writes do not alter
memory).
Solution: Move the buffer descriptor to dwords [0:3] or [8:11] of the sampler
slot (the image or fmask portion, respectively).
Note that on Southern Islands, this condition causes a hang.

SI:
Move the buffer descriptor to dwords [8:11] of the sampler slot. This will only
cause a hang when a buffer is used as sampler2DMS* and vice versa, because
dwords[8:11] are unused by other texture types.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Add i965 plumbing for ARB_post_depth_coverage for i965 (gen9+).

2016-11-30 Thread Ilia Mirkin
On Wed, Nov 30, 2016 at 3:00 PM, Plamena Manolova
 wrote:
> This extension allows the fragment shader to control whether values in
> gl_SampleMaskIn[] reflect the coverage after application of the early
> depth and stencil tests.
>
> Signed-off-by: Plamena Manolova 
> ---
>  docs/relnotes/13.1.0.html|  1 +

Also docs/features.txt :)

[I think the patch looks fine otherwise, but you should wait for an
Intel person to review it for real.]

>  src/mesa/drivers/dri/i965/brw_compiler.h |  1 +
>  src/mesa/drivers/dri/i965/brw_fs.cpp |  1 +
>  src/mesa/drivers/dri/i965/gen8_ps_state.c| 13 ++---
>  src/mesa/drivers/dri/i965/intel_extensions.c |  1 +
>  5 files changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/docs/relnotes/13.1.0.html b/docs/relnotes/13.1.0.html
> index 4f76cc2..a160cda 100644
> --- a/docs/relnotes/13.1.0.html
> +++ b/docs/relnotes/13.1.0.html
> @@ -45,6 +45,7 @@ Note: some of the new features are only available with 
> certain drivers.
>
>  
>  GL_NV_image_formats on any driver supporting 
> GL_ARB_shader_image_load_store (i965, nvc0, radeonsi, softpipe)
> +GL_ARB_post_depth_coverage on i965/gen9+

This is generally a sorted list. Should go first.

>  
>
>  Bug fixes
> diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h 
> b/src/mesa/drivers/dri/i965/brw_compiler.h
> index 65a7478..410641f 100644
> --- a/src/mesa/drivers/dri/i965/brw_compiler.h
> +++ b/src/mesa/drivers/dri/i965/brw_compiler.h
> @@ -397,6 +397,7 @@ struct brw_wm_prog_data {
> bool computed_stencil;
>
> bool early_fragment_tests;
> +   bool post_depth_coverage;
> bool dispatch_8;
> bool dispatch_16;
> bool dual_src_blend;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index c218f56..ce0c07e 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -6454,6 +6454,7 @@ brw_compile_fs(const struct brw_compiler *compiler, 
> void *log_data,
> shader->info->outputs_read);
>
> prog_data->early_fragment_tests = shader->info->fs.early_fragment_tests;
> +   prog_data->post_depth_coverage = shader->info->fs.post_depth_coverage;
>
> prog_data->barycentric_interp_modes =
>brw_compute_barycentric_interp_modes(compiler->devinfo, shader);
> diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c 
> b/src/mesa/drivers/dri/i965/gen8_ps_state.c
> index a4eb962..33ef023 100644
> --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
> +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
> @@ -53,10 +53,17 @@ gen8_upload_ps_extra(struct brw_context *brw,
>dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE;
>
> if (prog_data->uses_sample_mask) {
> -  if (brw->gen >= 9)
> - dw1 |= BRW_PSICMS_INNER << 
> GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
> -  else
> +  if (brw->gen >= 9) {
> + if (prog_data->post_depth_coverage) {
> +dw1 |= BRW_PCICMS_DEPTH << 
> GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
> + }
> + else {
> +dw1 |= BRW_PSICMS_INNER << 
> GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
> + }
> +  }
> +  else {
>   dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK;
> +  }
> }
>
> if (prog_data->uses_omask)
> diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
> b/src/mesa/drivers/dri/i965/intel_extensions.c
> index 66079b5..19f4684 100644
> --- a/src/mesa/drivers/dri/i965/intel_extensions.c
> +++ b/src/mesa/drivers/dri/i965/intel_extensions.c
> @@ -415,6 +415,7 @@ intelInitExtensions(struct gl_context *ctx)
>ctx->Extensions.KHR_texture_compression_astc_ldr = true;
>ctx->Extensions.KHR_texture_compression_astc_sliced_3d = true;
>ctx->Extensions.MESA_shader_framebuffer_fetch = true;
> +  ctx->Extensions.ARB_post_depth_coverage = true;
> }
>
> if (ctx->API == API_OPENGL_CORE)
> --
> 2.7.4
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: keep gl_program shader info in sync after gather info

2016-11-30 Thread Timothy Arceri
It's possible that nir_shader was cloned and it no longer contains
a pointer to the shader_info in gl_program. So we need to copy
shader_info back to gl_program if that is the case.

Fixes a regression with NIR_TEST_CLONE=true

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98840
---
 src/mesa/drivers/dri/i965/brw_program.c | 11 ++-
 src/mesa/drivers/dri/i965/brw_program.h |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index a502b8e..7f69977 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -64,7 +64,7 @@ brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
 nir_shader *
 brw_create_nir(struct brw_context *brw,
const struct gl_shader_program *shader_prog,
-   const struct gl_program *prog,
+   struct gl_program *prog,
gl_shader_stage stage,
bool is_scalar)
 {
@@ -107,6 +107,15 @@ brw_create_nir(struct brw_context *brw,
 
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
 
+   /* nir_shader may have been cloned so make sure shader_info is in sync */
+   if (nir->info != >info) {
+  const char *name = prog->info.name;
+  const char *label = prog->info.label;
+  prog->info = *nir->info;
+  prog->info.name = name;
+  prog->info.label = label;
+   }
+
if (shader_prog) {
   NIR_PASS_V(nir, nir_lower_samplers, shader_prog);
   NIR_PASS_V(nir, nir_lower_atomics, shader_prog);
diff --git a/src/mesa/drivers/dri/i965/brw_program.h 
b/src/mesa/drivers/dri/i965/brw_program.h
index 43bc625..6eda165 100644
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -34,7 +34,7 @@ struct brw_context;
 
 struct nir_shader *brw_create_nir(struct brw_context *brw,
   const struct gl_shader_program *shader_prog,
-  const struct gl_program *prog,
+  struct gl_program *prog,
   gl_shader_stage stage,
   bool is_scalar);
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: only verify that enabled arrays have backing buffers

2016-11-30 Thread Ilia Mirkin
On Wed, Nov 30, 2016 at 5:04 PM, Ilia Mirkin  wrote:
> We were previously also verifying that no backing buffers were available
> when an array wasn't enabled. This is has no basis in the spec, and it
> causes GLupeN64 to fail as a result.
>
> Fixes: c2e146f487 ("mesa: error out in indirect draw when vertex bindings 
> mismatch")
> Cc: mesa-sta...@lists.freedesktop.org
> Signed-off-by: Ilia Mirkin 
> Reviewed-by: Timothy Arceri 
> ---

Ran this through Intel's CI, doesn't appear to regress (or fix) anything.

>  src/mesa/main/api_validate.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
> index d3b4cab..071c16d 100644
> --- a/src/mesa/main/api_validate.c
> +++ b/src/mesa/main/api_validate.c
> @@ -925,7 +925,7 @@ valid_draw_indirect(struct gl_context *ctx,
>  * buffer bound.
>  */
> if (_mesa_is_gles31(ctx) &&
> -   ctx->Array.VAO->_Enabled != ctx->Array.VAO->VertexAttribBufferMask) {
> +   ctx->Array.VAO->_Enabled & ~ctx->Array.VAO->VertexAttribBufferMask) {
>_mesa_error(ctx, GL_INVALID_OPERATION, "%s(No VBO bound)", name);
>return GL_FALSE;
> }
> --
> 2.7.3
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] mesa: Add GL and GLSL plumbing for ARB_post_depth_coverage for i965 (gen9+).

2016-11-30 Thread Ilia Mirkin
On Wed, Nov 30, 2016 at 2:53 PM, Plamena Manolova
 wrote:
> This extension allows the fragment shader to control whether values in
> gl_SampleMaskIn[] reflect the coverage after application of the early
> depth and stencil tests.
>
> Signed-off-by: Plamena Manolova 
> ---
>  src/compiler/glsl/ast.h  |  5 +
>  src/compiler/glsl/ast_to_hir.cpp |  5 +
>  src/compiler/glsl/ast_type.cpp   |  9 -
>  src/compiler/glsl/glsl_parser.yy | 18 ++
>  src/compiler/glsl/glsl_parser_extras.cpp |  4 
>  src/compiler/glsl/glsl_parser_extras.h   |  4 
>  src/compiler/glsl/linker.cpp |  4 
>  src/compiler/shader_info.h   |  1 +
>  src/mesa/main/extensions_table.h |  1 +
>  src/mesa/main/mtypes.h   |  2 ++
>  src/mesa/main/shaderapi.c|  1 +
>  11 files changed, 53 insertions(+), 1 deletion(-)
>
> diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
> index afe91ea..df3a744 100644
> --- a/src/compiler/glsl/ast.h
> +++ b/src/compiler/glsl/ast.h
> @@ -605,6 +605,11 @@ struct ast_type_qualifier {
>   /** \{ */
>   unsigned blend_support:1; /**< Are there any blend_support_ 
> qualifiers */
>   /** \} */
> +
> + /**
> +  * Flag set if GL_ARB_post_depth_coverage layout qualifier is used.
> +  */
> + unsigned post_depth_coverage:1;
>}
>/** \brief Set of flags, accessed by name. */
>q;
> diff --git a/src/compiler/glsl/ast_to_hir.cpp 
> b/src/compiler/glsl/ast_to_hir.cpp
> index c2ce389..2434ce5 100644
> --- a/src/compiler/glsl/ast_to_hir.cpp
> +++ b/src/compiler/glsl/ast_to_hir.cpp
> @@ -3632,6 +3632,11 @@ apply_layout_qualifier_to_variable(const struct 
> ast_type_qualifier *qual,
>_mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier 
> only "
> "valid in fragment shader input layout declaration.");
> }
> +
> +   if (qual->flags.q.post_depth_coverage) {
> +  _mesa_glsl_error(loc, state, "post_depth_coverage layout qualifier 
> only "
> +   "valid in fragment shader input layout declaration.");
> +   }
>  }
>
>  static void
> diff --git a/src/compiler/glsl/ast_type.cpp b/src/compiler/glsl/ast_type.cpp
> index 3431e24..aa1ae7e 100644
> --- a/src/compiler/glsl/ast_type.cpp
> +++ b/src/compiler/glsl/ast_type.cpp
> @@ -579,6 +579,7 @@ ast_type_qualifier::validate_in_qualifier(YYLTYPE *loc,
>break;
> case MESA_SHADER_FRAGMENT:
>valid_in_mask.flags.q.early_fragment_tests = 1;
> +  valid_in_mask.flags.q.post_depth_coverage = 1;
>break;
> case MESA_SHADER_COMPUTE:
>valid_in_mask.flags.q.local_size = 7;
> @@ -633,6 +634,11 @@ ast_type_qualifier::merge_into_in_qualifier(YYLTYPE *loc,
>state->in_qualifier->flags.q.early_fragment_tests = false;
> }
>
> +   if (state->in_qualifier->flags.q.post_depth_coverage) {
> +  state->fs_post_depth_coverage = true;
> +  state->in_qualifier->flags.q.post_depth_coverage = false;
> +   }
> +
> /* We allow the creation of multiple cs_input_layout nodes. Coherence 
> among
>  * all existing nodes is checked later, when the AST node is transformed
>  * into HIR.
> @@ -761,7 +767,8 @@ ast_type_qualifier::validate_flags(YYLTYPE *loc,
>  bad.flags.q.point_mode ? " point_mode" : "",
>  bad.flags.q.vertices ? " vertices" : "",
>  bad.flags.q.subroutine ? " subroutine" : "",
> -bad.flags.q.subroutine_def ? " subroutine_def" : "");
> +bad.flags.q.subroutine_def ? " subroutine_def" : "",
> +bad.flags.q.post_depth_coverage ? " post_depth_coverage" 
> : "");
> return false;
>  }
>
> diff --git a/src/compiler/glsl/glsl_parser.yy 
> b/src/compiler/glsl/glsl_parser.yy
> index 0c3781c..09b7e79 100644
> --- a/src/compiler/glsl/glsl_parser.yy
> +++ b/src/compiler/glsl/glsl_parser.yy
> @@ -1392,6 +1392,24 @@ layout_qualifier_id:
>
>  $$.flags.q.early_fragment_tests = 1;
>   }
> +
> + if (!$$.flags.i &&
> + match_layout_qualifier($1, "post_depth_coverage", state) == 0) {
> +if (state->stage != MESA_SHADER_FRAGMENT) {
> +   _mesa_glsl_error(& @1, state,
> +"post_depth_coverage layout qualifier only "
> +"valid in fragment shaders");
> +}
> +
> +if (state->ARB_post_depth_coverage_enable) {
> +   $$.flags.q.post_depth_coverage = 1;
> +} else {
> +   _mesa_glsl_error(& @1, state,
> +"post_depth_coverage layout qualifier 
> present, "
> +"but the GL_ARB_post_depth_coverage 
> extension "
> +"is not 

Re: [Mesa-dev] [PATCH 1/2] mesa: Add GL and GLSL plumbing for ARB_post_depth_coverage for i965 (gen9+).

2016-11-30 Thread Chris Forbes
Excellent, disregard that. Patch looks good.

On Thu, Dec 1, 2016 at 3:10 PM, Ilia Mirkin  wrote:

> On Wed, Nov 30, 2016 at 9:10 PM, Chris Forbes  wrote:
> > This patch misses adding the #define to the GLSL preprocessor. Other than
>
> The future is today. That's no longer necessary :)
>
> > that it looks good though, so with that fixed:
> >
> > Reviewed-by: Chris Forbes 
> >
> > On Thu, Dec 1, 2016 at 8:53 AM, Plamena Manolova
> >  wrote:
> >>
> >> This extension allows the fragment shader to control whether values in
> >> gl_SampleMaskIn[] reflect the coverage after application of the early
> >> depth and stencil tests.
> >>
> >> Signed-off-by: Plamena Manolova 
> >> ---
> >>  src/compiler/glsl/ast.h  |  5 +
> >>  src/compiler/glsl/ast_to_hir.cpp |  5 +
> >>  src/compiler/glsl/ast_type.cpp   |  9 -
> >>  src/compiler/glsl/glsl_parser.yy | 18 ++
> >>  src/compiler/glsl/glsl_parser_extras.cpp |  4 
> >>  src/compiler/glsl/glsl_parser_extras.h   |  4 
> >>  src/compiler/glsl/linker.cpp |  4 
> >>  src/compiler/shader_info.h   |  1 +
> >>  src/mesa/main/extensions_table.h |  1 +
> >>  src/mesa/main/mtypes.h   |  2 ++
> >>  src/mesa/main/shaderapi.c|  1 +
> >>  11 files changed, 53 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
> >> index afe91ea..df3a744 100644
> >> --- a/src/compiler/glsl/ast.h
> >> +++ b/src/compiler/glsl/ast.h
> >> @@ -605,6 +605,11 @@ struct ast_type_qualifier {
> >>   /** \{ */
> >>   unsigned blend_support:1; /**< Are there any blend_support_
> >> qualifiers */
> >>   /** \} */
> >> +
> >> + /**
> >> +  * Flag set if GL_ARB_post_depth_coverage layout qualifier is
> >> used.
> >> +  */
> >> + unsigned post_depth_coverage:1;
> >>}
> >>/** \brief Set of flags, accessed by name. */
> >>q;
> >> diff --git a/src/compiler/glsl/ast_to_hir.cpp
> >> b/src/compiler/glsl/ast_to_hir.cpp
> >> index c2ce389..2434ce5 100644
> >> --- a/src/compiler/glsl/ast_to_hir.cpp
> >> +++ b/src/compiler/glsl/ast_to_hir.cpp
> >> @@ -3632,6 +3632,11 @@ apply_layout_qualifier_to_variable(const struct
> >> ast_type_qualifier *qual,
> >>_mesa_glsl_error(loc, state, "early_fragment_tests layout
> qualifier
> >> only "
> >> "valid in fragment shader input layout
> >> declaration.");
> >> }
> >> +
> >> +   if (qual->flags.q.post_depth_coverage) {
> >> +  _mesa_glsl_error(loc, state, "post_depth_coverage layout
> qualifier
> >> only "
> >> +   "valid in fragment shader input layout
> >> declaration.");
> >> +   }
> >>  }
> >>
> >>  static void
> >> diff --git a/src/compiler/glsl/ast_type.cpp
> >> b/src/compiler/glsl/ast_type.cpp
> >> index 3431e24..aa1ae7e 100644
> >> --- a/src/compiler/glsl/ast_type.cpp
> >> +++ b/src/compiler/glsl/ast_type.cpp
> >> @@ -579,6 +579,7 @@ ast_type_qualifier::validate_in_qualifier(YYLTYPE
> >> *loc,
> >>break;
> >> case MESA_SHADER_FRAGMENT:
> >>valid_in_mask.flags.q.early_fragment_tests = 1;
> >> +  valid_in_mask.flags.q.post_depth_coverage = 1;
> >>break;
> >> case MESA_SHADER_COMPUTE:
> >>valid_in_mask.flags.q.local_size = 7;
> >> @@ -633,6 +634,11 @@ ast_type_qualifier::merge_
> into_in_qualifier(YYLTYPE
> >> *loc,
> >>state->in_qualifier->flags.q.early_fragment_tests = false;
> >> }
> >>
> >> +   if (state->in_qualifier->flags.q.post_depth_coverage) {
> >> +  state->fs_post_depth_coverage = true;
> >> +  state->in_qualifier->flags.q.post_depth_coverage = false;
> >> +   }
> >> +
> >> /* We allow the creation of multiple cs_input_layout nodes.
> Coherence
> >> among
> >>  * all existing nodes is checked later, when the AST node is
> >> transformed
> >>  * into HIR.
> >> @@ -761,7 +767,8 @@ ast_type_qualifier::validate_flags(YYLTYPE *loc,
> >>  bad.flags.q.point_mode ? " point_mode" : "",
> >>  bad.flags.q.vertices ? " vertices" : "",
> >>  bad.flags.q.subroutine ? " subroutine" : "",
> >> -bad.flags.q.subroutine_def ? " subroutine_def" :
> "");
> >> +bad.flags.q.subroutine_def ? " subroutine_def" :
> "",
> >> +bad.flags.q.post_depth_coverage ? "
> >> post_depth_coverage" : "");
> >> return false;
> >>  }
> >>
> >> diff --git a/src/compiler/glsl/glsl_parser.yy
> >> b/src/compiler/glsl/glsl_parser.yy
> >> index 0c3781c..09b7e79 100644
> >> --- a/src/compiler/glsl/glsl_parser.yy
> >> +++ b/src/compiler/glsl/glsl_parser.yy
> >> @@ -1392,6 +1392,24 @@ layout_qualifier_id:
> >>
> >>  $$.flags.q.early_fragment_tests = 1;
> >>   

Re: [Mesa-dev] [PATCH 1/2] mesa: Add GL and GLSL plumbing for ARB_post_depth_coverage for i965 (gen9+).

2016-11-30 Thread Ilia Mirkin
On Wed, Nov 30, 2016 at 9:10 PM, Chris Forbes  wrote:
> This patch misses adding the #define to the GLSL preprocessor. Other than

The future is today. That's no longer necessary :)

> that it looks good though, so with that fixed:
>
> Reviewed-by: Chris Forbes 
>
> On Thu, Dec 1, 2016 at 8:53 AM, Plamena Manolova
>  wrote:
>>
>> This extension allows the fragment shader to control whether values in
>> gl_SampleMaskIn[] reflect the coverage after application of the early
>> depth and stencil tests.
>>
>> Signed-off-by: Plamena Manolova 
>> ---
>>  src/compiler/glsl/ast.h  |  5 +
>>  src/compiler/glsl/ast_to_hir.cpp |  5 +
>>  src/compiler/glsl/ast_type.cpp   |  9 -
>>  src/compiler/glsl/glsl_parser.yy | 18 ++
>>  src/compiler/glsl/glsl_parser_extras.cpp |  4 
>>  src/compiler/glsl/glsl_parser_extras.h   |  4 
>>  src/compiler/glsl/linker.cpp |  4 
>>  src/compiler/shader_info.h   |  1 +
>>  src/mesa/main/extensions_table.h |  1 +
>>  src/mesa/main/mtypes.h   |  2 ++
>>  src/mesa/main/shaderapi.c|  1 +
>>  11 files changed, 53 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
>> index afe91ea..df3a744 100644
>> --- a/src/compiler/glsl/ast.h
>> +++ b/src/compiler/glsl/ast.h
>> @@ -605,6 +605,11 @@ struct ast_type_qualifier {
>>   /** \{ */
>>   unsigned blend_support:1; /**< Are there any blend_support_
>> qualifiers */
>>   /** \} */
>> +
>> + /**
>> +  * Flag set if GL_ARB_post_depth_coverage layout qualifier is
>> used.
>> +  */
>> + unsigned post_depth_coverage:1;
>>}
>>/** \brief Set of flags, accessed by name. */
>>q;
>> diff --git a/src/compiler/glsl/ast_to_hir.cpp
>> b/src/compiler/glsl/ast_to_hir.cpp
>> index c2ce389..2434ce5 100644
>> --- a/src/compiler/glsl/ast_to_hir.cpp
>> +++ b/src/compiler/glsl/ast_to_hir.cpp
>> @@ -3632,6 +3632,11 @@ apply_layout_qualifier_to_variable(const struct
>> ast_type_qualifier *qual,
>>_mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier
>> only "
>> "valid in fragment shader input layout
>> declaration.");
>> }
>> +
>> +   if (qual->flags.q.post_depth_coverage) {
>> +  _mesa_glsl_error(loc, state, "post_depth_coverage layout qualifier
>> only "
>> +   "valid in fragment shader input layout
>> declaration.");
>> +   }
>>  }
>>
>>  static void
>> diff --git a/src/compiler/glsl/ast_type.cpp
>> b/src/compiler/glsl/ast_type.cpp
>> index 3431e24..aa1ae7e 100644
>> --- a/src/compiler/glsl/ast_type.cpp
>> +++ b/src/compiler/glsl/ast_type.cpp
>> @@ -579,6 +579,7 @@ ast_type_qualifier::validate_in_qualifier(YYLTYPE
>> *loc,
>>break;
>> case MESA_SHADER_FRAGMENT:
>>valid_in_mask.flags.q.early_fragment_tests = 1;
>> +  valid_in_mask.flags.q.post_depth_coverage = 1;
>>break;
>> case MESA_SHADER_COMPUTE:
>>valid_in_mask.flags.q.local_size = 7;
>> @@ -633,6 +634,11 @@ ast_type_qualifier::merge_into_in_qualifier(YYLTYPE
>> *loc,
>>state->in_qualifier->flags.q.early_fragment_tests = false;
>> }
>>
>> +   if (state->in_qualifier->flags.q.post_depth_coverage) {
>> +  state->fs_post_depth_coverage = true;
>> +  state->in_qualifier->flags.q.post_depth_coverage = false;
>> +   }
>> +
>> /* We allow the creation of multiple cs_input_layout nodes. Coherence
>> among
>>  * all existing nodes is checked later, when the AST node is
>> transformed
>>  * into HIR.
>> @@ -761,7 +767,8 @@ ast_type_qualifier::validate_flags(YYLTYPE *loc,
>>  bad.flags.q.point_mode ? " point_mode" : "",
>>  bad.flags.q.vertices ? " vertices" : "",
>>  bad.flags.q.subroutine ? " subroutine" : "",
>> -bad.flags.q.subroutine_def ? " subroutine_def" : "");
>> +bad.flags.q.subroutine_def ? " subroutine_def" : "",
>> +bad.flags.q.post_depth_coverage ? "
>> post_depth_coverage" : "");
>> return false;
>>  }
>>
>> diff --git a/src/compiler/glsl/glsl_parser.yy
>> b/src/compiler/glsl/glsl_parser.yy
>> index 0c3781c..09b7e79 100644
>> --- a/src/compiler/glsl/glsl_parser.yy
>> +++ b/src/compiler/glsl/glsl_parser.yy
>> @@ -1392,6 +1392,24 @@ layout_qualifier_id:
>>
>>  $$.flags.q.early_fragment_tests = 1;
>>   }
>> +
>> + if (!$$.flags.i &&
>> + match_layout_qualifier($1, "post_depth_coverage", state) ==
>> 0) {
>> +if (state->stage != MESA_SHADER_FRAGMENT) {
>> +   _mesa_glsl_error(& @1, state,
>> +"post_depth_coverage layout qualifier
>> only "
>> +"valid in 

Re: [Mesa-dev] [PATCH 1/2] mesa: Add GL and GLSL plumbing for ARB_post_depth_coverage for i965 (gen9+).

2016-11-30 Thread Chris Forbes
This patch misses adding the #define to the GLSL preprocessor. Other than
that it looks good though, so with that fixed:

Reviewed-by: Chris Forbes 

On Thu, Dec 1, 2016 at 8:53 AM, Plamena Manolova  wrote:

> This extension allows the fragment shader to control whether values in
> gl_SampleMaskIn[] reflect the coverage after application of the early
> depth and stencil tests.
>
> Signed-off-by: Plamena Manolova 
> ---
>  src/compiler/glsl/ast.h  |  5 +
>  src/compiler/glsl/ast_to_hir.cpp |  5 +
>  src/compiler/glsl/ast_type.cpp   |  9 -
>  src/compiler/glsl/glsl_parser.yy | 18 ++
>  src/compiler/glsl/glsl_parser_extras.cpp |  4 
>  src/compiler/glsl/glsl_parser_extras.h   |  4 
>  src/compiler/glsl/linker.cpp |  4 
>  src/compiler/shader_info.h   |  1 +
>  src/mesa/main/extensions_table.h |  1 +
>  src/mesa/main/mtypes.h   |  2 ++
>  src/mesa/main/shaderapi.c|  1 +
>  11 files changed, 53 insertions(+), 1 deletion(-)
>
> diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
> index afe91ea..df3a744 100644
> --- a/src/compiler/glsl/ast.h
> +++ b/src/compiler/glsl/ast.h
> @@ -605,6 +605,11 @@ struct ast_type_qualifier {
>   /** \{ */
>   unsigned blend_support:1; /**< Are there any blend_support_
> qualifiers */
>   /** \} */
> +
> + /**
> +  * Flag set if GL_ARB_post_depth_coverage layout qualifier is
> used.
> +  */
> + unsigned post_depth_coverage:1;
>}
>/** \brief Set of flags, accessed by name. */
>q;
> diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_
> hir.cpp
> index c2ce389..2434ce5 100644
> --- a/src/compiler/glsl/ast_to_hir.cpp
> +++ b/src/compiler/glsl/ast_to_hir.cpp
> @@ -3632,6 +3632,11 @@ apply_layout_qualifier_to_variable(const struct
> ast_type_qualifier *qual,
>_mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier
> only "
> "valid in fragment shader input layout
> declaration.");
> }
> +
> +   if (qual->flags.q.post_depth_coverage) {
> +  _mesa_glsl_error(loc, state, "post_depth_coverage layout qualifier
> only "
> +   "valid in fragment shader input layout
> declaration.");
> +   }
>  }
>
>  static void
> diff --git a/src/compiler/glsl/ast_type.cpp b/src/compiler/glsl/ast_type.
> cpp
> index 3431e24..aa1ae7e 100644
> --- a/src/compiler/glsl/ast_type.cpp
> +++ b/src/compiler/glsl/ast_type.cpp
> @@ -579,6 +579,7 @@ ast_type_qualifier::validate_in_qualifier(YYLTYPE
> *loc,
>break;
> case MESA_SHADER_FRAGMENT:
>valid_in_mask.flags.q.early_fragment_tests = 1;
> +  valid_in_mask.flags.q.post_depth_coverage = 1;
>break;
> case MESA_SHADER_COMPUTE:
>valid_in_mask.flags.q.local_size = 7;
> @@ -633,6 +634,11 @@ ast_type_qualifier::merge_into_in_qualifier(YYLTYPE
> *loc,
>state->in_qualifier->flags.q.early_fragment_tests = false;
> }
>
> +   if (state->in_qualifier->flags.q.post_depth_coverage) {
> +  state->fs_post_depth_coverage = true;
> +  state->in_qualifier->flags.q.post_depth_coverage = false;
> +   }
> +
> /* We allow the creation of multiple cs_input_layout nodes. Coherence
> among
>  * all existing nodes is checked later, when the AST node is
> transformed
>  * into HIR.
> @@ -761,7 +767,8 @@ ast_type_qualifier::validate_flags(YYLTYPE *loc,
>  bad.flags.q.point_mode ? " point_mode" : "",
>  bad.flags.q.vertices ? " vertices" : "",
>  bad.flags.q.subroutine ? " subroutine" : "",
> -bad.flags.q.subroutine_def ? " subroutine_def" : "");
> +bad.flags.q.subroutine_def ? " subroutine_def" : "",
> +bad.flags.q.post_depth_coverage ? "
> post_depth_coverage" : "");
> return false;
>  }
>
> diff --git a/src/compiler/glsl/glsl_parser.yy b/src/compiler/glsl/glsl_
> parser.yy
> index 0c3781c..09b7e79 100644
> --- a/src/compiler/glsl/glsl_parser.yy
> +++ b/src/compiler/glsl/glsl_parser.yy
> @@ -1392,6 +1392,24 @@ layout_qualifier_id:
>
>  $$.flags.q.early_fragment_tests = 1;
>   }
> +
> + if (!$$.flags.i &&
> + match_layout_qualifier($1, "post_depth_coverage", state) ==
> 0) {
> +if (state->stage != MESA_SHADER_FRAGMENT) {
> +   _mesa_glsl_error(& @1, state,
> +"post_depth_coverage layout qualifier
> only "
> +"valid in fragment shaders");
> +}
> +
> +if (state->ARB_post_depth_coverage_enable) {
> +   $$.flags.q.post_depth_coverage = 1;
> +} else {
> +   _mesa_glsl_error(& @1, state,
> +   

[Mesa-dev] [AppVeyor] mesa master #2856 completed

2016-11-30 Thread AppVeyor


Build mesa 2856 completed



Commit fda1d0187d by Ilia Mirkin on 11/30/2016 5:56 AM:

anv: expose support for VK_KHR_sampler_mirror_clamp_to_edge\n\nThis is already supported in genX_state.c, expose the extension string.\n\nSigned-off-by: Ilia Mirkin \nReviewed-by: Jason Ekstrand 


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv/cmd_buffer: Actually use the stencil dimension

2016-11-30 Thread Jason Ekstrand
On Wed, Nov 30, 2016 at 10:20 AM, Nanley Chery 
wrote:

> On Tue, Nov 29, 2016 at 05:41:58PM -0800, Jason Ekstrand wrote:
> > In an attempt to fix 3DSTATE_DEPTH_BUFFER for stencil-only cases, I
> > accidentally kept setting the SurfaceType to 2D in the stencil-only case
> > thanks to a copy+paste error.
> >
> > Cc: Nanley Chery 
> > ---
> >  src/intel/vulkan/genX_cmd_buffer.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
>
> Thanks for the fix! This patch is,
> Reviewed-by: Nanley Chery 
>

Thanks!  Pushed.  Sorry for the mess.


> >
> > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> b/src/intel/vulkan/genX_cmd_buffer.c
> > index 73f4523..f761d9a 100644
> > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > @@ -2153,7 +2153,7 @@ cmd_buffer_emit_depth_stencil(struct
> anv_cmd_buffer *cmd_buffer)
> > */
> >anv_batch_emit(_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER),
> db) {
> >   if (has_stencil) {
> > -db.SurfaceType   = SURFTYPE_2D;
> > +db.SurfaceType   =
> > depth_stencil_surface_type(image->stencil_surface.isl.
> dim);
> >   } else {
> >  db.SurfaceType   = SURFTYPE_2D;
> > --
> > 2.5.0.400.gff86faf
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [AppVeyor] mesa master #2855 failed

2016-11-30 Thread AppVeyor



Build mesa 2855 failed


Commit 27433b26b1 by Jason Ekstrand on 11/29/2016 10:00 PM:

anv/cmd_buffer: Actually use the stencil dimension\n\nIn an attempt to fix 3DSTATE_DEPTH_BUFFER for stencil-only cases, I\naccidentally kept setting the SurfaceType to 2D in the stencil-only case\nthanks to a copy+paste error.\n\nReviewed-by: Nanley Chery 


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 98911] [Ironlake Mobile] Severe Artifacting in Minecraft and weird "screen leftovers" when recording desktop with latest Mesa

2016-11-30 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=98911

Kenneth Graunke  changed:

   What|Removed |Added

  Component|Mesa core   |Drivers/DRI/i965
   Assignee|mesa-dev@lists.freedesktop. |intel-3d-bugs@lists.freedes
   |org |ktop.org
 QA Contact|mesa-dev@lists.freedesktop. |intel-3d-bugs@lists.freedes
   |org |ktop.org

--- Comment #2 from Kenneth Graunke  ---
This looks familiar.  Does running with INTEL_DEBUG=no16 fix the problem?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa 13.1.0 release plan

2016-11-30 Thread Jason Ekstrand
Weren't we going to call it 17.0?...

On Nov 30, 2016 12:23 PM, "Emil Velikov"  wrote:

> Hi all,
>
> With holidays not far off, it might be a nice idea to consider the
> branchpoint/release schedule for the next release.
>
> I will be having limited internet access during 20 Dec - 7 Jan, thus
> the I'm leaning towards following:
>  Jan 13 2017 - Feature freeze/Release candidate 1
>  Jan 20 2017 - Release candidate 2
>  Jan 27 2017 - Release candidate 3
>  Feb 03 2017 - Release candidate 4/final release
>
> How does this align with people's schedules ?
>
> Please let me know if you have any work we want to land before the
> next branchpoint.
>
> Thanks
> Emil
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [AppVeyor] mesa master #2852 failed

2016-11-30 Thread AppVeyor



Build mesa 2852 failed


Commit 966567aa12 by Timothy Arceri on 11/30/2016 9:52 PM:

mesa: reset linked_stages bitmask when re-linking\n\n34953f8907fdd added this bitmask but it wasn't being reset when\na program was relinked. If a stage was removed from the new\nprogram then it could case a crash as we expect the linked shader\nfor that stage to not be null.\n\nFixes crashes in:\nESEXT-CTS.tessellation_shader.single.xfb_captures_data_from_correct_stage\nES31-CTS.core.tessellation_shader.single.xfb_captures_data_from_correct_stage\n\nReviewed-by: Kenneth Graunke \nReviewed-by: Dave Airlie \nBugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98917


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] ac/nir: Only use the first component for SSBO atomics.

2016-11-30 Thread Bas Nieuwenhuizen
Signed-off-by: Bas Nieuwenhuizen 
---
 src/amd/common/ac_nir_to_llvm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index df121d8..87bb19a 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1952,9 +1952,9 @@ static LLVMValueRef visit_atomic_ssbo(struct 
nir_to_llvm_context *ctx,
ctx->shader_info->fs.writes_memory = true;
 
if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
-   params[arg_count++] = get_src(ctx, instr->src[3]);
+   params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, 
instr->src[3]), 0);
}
-   params[arg_count++] = get_src(ctx, instr->src[2]);
+   params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, 
instr->src[2]), 0);
params[arg_count++] = get_src(ctx, instr->src[0]);
params[arg_count++] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
params[arg_count++] = get_src(ctx, instr->src[1]);  /* voffset */
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] swr: add streamout buffer offset into pBuffer pointer

2016-11-30 Thread Rowley, Timothy O
Reviewed-by: Tim Rowley 
>

On Nov 29, 2016, at 8:23 PM, Ilia Mirkin 
> wrote:

The buffer_size does not take the offset into account. Just add the
offset into the pointer which lines up the structures much better.

Signed-off-by: Ilia Mirkin >
---

This doesn't really fix anything right now, but logically the streamOffset
is incremented on each draw, and is optionally written back out as a watermark
indicator (for pausing/resuming streams). So it should be relative to the
logical start of the buffer.

src/gallium/drivers/swr/swr_state.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_state.cpp 
b/src/gallium/drivers/swr/swr_state.cpp
index fc835dc..4475252 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1488,10 +1488,11 @@ swr_update_derived(struct pipe_context *pipe,
continue;
 buffer.enable = true;
 buffer.pBuffer =
-(uint32_t *)swr_resource_data(ctx->so_targets[i]->buffer);
+(uint32_t *)(swr_resource_data(ctx->so_targets[i]->buffer) +
+ ctx->so_targets[i]->buffer_offset);
 buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2;
 buffer.pitch = stream_output->stride[i];
- buffer.streamOffset = ctx->so_targets[i]->buffer_offset >> 2;
+ buffer.streamOffset = 0;

 SwrSetSoBuffers(ctx->swrContext, , i);
  }
--
2.7.3


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/5] swr: turn off queries around blits

2016-11-30 Thread Rowley, Timothy O
Reviewed-by: Tim Rowley 
>

On Nov 29, 2016, at 8:23 PM, Ilia Mirkin 
> wrote:

Signed-off-by: Ilia Mirkin >
---
src/gallium/drivers/swr/swr_context.cpp | 10 +-
1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/swr_context.cpp 
b/src/gallium/drivers/swr/swr_context.cpp
index b355bba..b8c87fa 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -301,7 +301,10 @@ swr_blit(struct pipe_context *pipe, const struct 
pipe_blit_info *blit_info)
  return;
   }

-   /* XXX turn off occlusion and streamout queries */
+   if (ctx->active_queries) {
+  SwrEnableStatsFE(ctx->swrContext, FALSE);
+  SwrEnableStatsBE(ctx->swrContext, FALSE);
+   }

   util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffer);
   util_blitter_save_vertex_elements(ctx->blitter, (void *)ctx->velems);
@@ -335,6 +338,11 @@ swr_blit(struct pipe_context *pipe, const struct 
pipe_blit_info *blit_info)
  ctx->render_cond_mode);

   util_blitter_blit(ctx->blitter, );
+
+   if (ctx->active_queries) {
+  SwrEnableStatsFE(ctx->swrContext, TRUE);
+  SwrEnableStatsBE(ctx->swrContext, TRUE);
+   }
}


--
2.7.3


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] swr: fix assertion for max number of so targets

2016-11-30 Thread Rowley, Timothy O
Reviewed-by: Tim Rowley 
>

On Nov 29, 2016, at 8:23 PM, Ilia Mirkin 
> wrote:

The number has to be less than or equal to the max, not just less than.

Signed-off-by: Ilia Mirkin >
---
src/gallium/drivers/swr/swr_state.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/swr_state.cpp 
b/src/gallium/drivers/swr/swr_state.cpp
index 9f6b5b0..fc835dc 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1570,7 +1570,7 @@ swr_set_so_targets(struct pipe_context *pipe,
   struct swr_context *swr = swr_context(pipe);
   uint32_t i;

-   assert(num_targets < MAX_SO_STREAMS);
+   assert(num_targets <= MAX_SO_STREAMS);

   for (i = 0; i < num_targets; i++) {
  pipe_so_target_reference(
--
2.7.3


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/5] swr: properly report max number of SO components

2016-11-30 Thread Rowley, Timothy O
Reviewed-by: Tim Rowley 
>

On Nov 29, 2016, at 8:23 PM, Ilia Mirkin 
> wrote:

The components count the number of individual values, not the number of
slots.

Signed-off-by: Ilia Mirkin >
---
src/gallium/drivers/swr/swr_screen.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
b/src/gallium/drivers/swr/swr_screen.cpp
index e184548..2388922 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -166,7 +166,7 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap 
param)
  return MAX_SO_STREAMS;
   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
-  return MAX_ATTRIBUTES;
+  return MAX_ATTRIBUTES * 4;
   case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
   case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
  return 1024;
--
2.7.3


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: only verify that enabled arrays have backing buffers

2016-11-30 Thread Ilia Mirkin
We were previously also verifying that no backing buffers were available
when an array wasn't enabled. This is has no basis in the spec, and it
causes GLupeN64 to fail as a result.

Fixes: c2e146f487 ("mesa: error out in indirect draw when vertex bindings 
mismatch")
Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Ilia Mirkin 
Reviewed-by: Timothy Arceri 
---
 src/mesa/main/api_validate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index d3b4cab..071c16d 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -925,7 +925,7 @@ valid_draw_indirect(struct gl_context *ctx,
 * buffer bound.
 */
if (_mesa_is_gles31(ctx) &&
-   ctx->Array.VAO->_Enabled != ctx->Array.VAO->VertexAttribBufferMask) {
+   ctx->Array.VAO->_Enabled & ~ctx->Array.VAO->VertexAttribBufferMask) {
   _mesa_error(ctx, GL_INVALID_OPERATION, "%s(No VBO bound)", name);
   return GL_FALSE;
}
-- 
2.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: reset linked_stages bitmask when re-linking

2016-11-30 Thread Timothy Arceri
34953f8907fdd added this bitmask but it wasn't being reset when
a program was relinked. If a stage was removed from the new
program then it could case a crash as we expect the linked shader
for that stage to not be null.

Fixes crashes in:
ESEXT-CTS.tessellation_shader.single.xfb_captures_data_from_correct_stage
ES31-CTS.core.tessellation_shader.single.xfb_captures_data_from_correct_stage

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98917
---
 src/mesa/main/shaderobj.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
index 998656a..6fcdf44 100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -348,6 +348,8 @@ _mesa_clear_shader_program_data(struct gl_context *ctx,
   }
}
 
+   shProg->data->linked_stages = 0;
+
if (shProg->data->UniformStorage) {
   for (unsigned i = 0; i < shProg->data->NumUniformStorage; ++i)
  _mesa_uniform_detach_all_driver_storage(>data->
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/50] GL_ARB_gpu_shader_int64... this time for sure!

2016-11-30 Thread Matt Turner

On 11/28, Ian Romanick wrote:

From: Ian Romanick 

I believe that I have addressed all of the review feedback from the
previous iteration.  Many of the patches have been reviewed, and they
should be ready to go.

Patches marked with "vN" in the subject have changed in a non-trivial
way since last being sent to he list.

Several patches that have not changed need review:

   Patches 23 through 33 add lowering passes for 64-bit operations.


I sent a few comments, and I cannot claim to have verified the division
routine, but the rest are

Reviewed-by: Matt Turner 

Feel free to put an Acked-by on the division routine (patch 29).


   Patches 42 through 50 enable the extension on BDW+.


I'll look at these next.


signature.asc
Description: Digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/5] swr: don't advertise stream pause/resume

2016-11-30 Thread Rowley, Timothy O
Reviewed-by: Tim Rowley 
>

On Nov 29, 2016, at 8:23 PM, Ilia Mirkin 
> wrote:

There is no support for resuming streamout. Furthermore, this also
controls glDrawTransformFeedback functionality which requires the same
ability to query how many primitives were sent out of TF.

Signed-off-by: Ilia Mirkin >
---

I have a partially-working patch for bringing this back, but it's not 100%
quite yet - some sort of issues with concurrency I have yet to track down.

However in the current state, this is just totally not supported by the FE
(but the swr core does do this).

src/gallium/drivers/swr/swr_screen.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
b/src/gallium/drivers/swr/swr_screen.cpp
index 19bb102..e184548 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -232,7 +232,6 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap 
param)
   case PIPE_CAP_USER_VERTEX_BUFFERS:
   case PIPE_CAP_USER_INDEX_BUFFERS:
   case PIPE_CAP_USER_CONSTANT_BUFFERS:
-   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
   case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
   case PIPE_CAP_QUERY_TIMESTAMP:
   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
@@ -311,6 +310,7 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap 
param)
   case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
   case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
   case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
+   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
  return 0;

   case PIPE_CAP_VENDOR_ID:
--
2.7.3


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] swr: remove warning about multi-layer surfaces

2016-11-30 Thread Rowley, Timothy O
Reviewed-by: Tim Rowley 
>

On Nov 29, 2016, at 8:05 PM, Ilia Mirkin 
> wrote:

We now support clearing these, and actually rendering to multiple layers
would require GS support, which will fail in much more spectacular ways
for now. Once that is hooked up, there won't be anything else to do
here.

Signed-off-by: Ilia Mirkin >
---
src/gallium/drivers/swr/swr_context.cpp | 4 
1 file changed, 4 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_context.cpp 
b/src/gallium/drivers/swr/swr_context.cpp
index 5a1927c..b355bba 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -62,10 +62,6 @@ swr_create_surface(struct pipe_context *pipe,
 ps->u.tex.level = surf_tmpl->u.tex.level;
 ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
 ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
- if (ps->u.tex.first_layer != ps->u.tex.last_layer) {
-debug_printf("creating surface with multiple layers, rendering "
- "to first layer only\n");
- }
  } else {
 /* setting width as number of elements should get us correct
  * renderbuffer width */
--
2.7.3


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] swr: [rasterizer core] don't attempt to load another RTAI when storing

2016-11-30 Thread Rowley, Timothy O
Reviewed-by: Tim Rowley 
>

On Nov 16, 2016, at 9:04 PM, Ilia Mirkin 
> wrote:

Since we don't pass a renderTargetArrayIndex in, and the current hot
tile may be for a different index, we may end up loading the RTAI=0 into
the hot tile for no reason.

Signed-off-by: Ilia Mirkin >
---

Noticed this when doing an audit of GetHotTile calls without a 
renderTargetArrayIndex being passed in. In this case, I don't think it should 
be loading at all...

Note that this has not been rigorously tested.

src/gallium/drivers/swr/rasterizer/core/backend.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp 
b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index 3375585..29d0ff5 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -361,7 +361,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t 
workerId, uint32_t macroTile
MacroTileMgr::getTileIndices(macroTile, x, y);

// Only need to store the hottile if it's been rendered to...
-HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, 
macroTile, attachment, false);
+HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTileNoLoad(pContext, pDC, 
macroTile, attachment, false);
if (pHotTile)
{
// clear if clear is pending (i.e., not rendered to), then mark as 
dirty for store.
--
2.7.3


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/23] tgsi: add Stream{X, Y, Z, W} fields to tgsi_declaration_semantic

2016-11-30 Thread Roland Scheidegger
Am 30.11.2016 um 20:19 schrieb Nicolai Hähnle:
> On 30.11.2016 19:06, Roland Scheidegger wrote:
>> Am 30.11.2016 um 14:35 schrieb Nicolai Hähnle:
>>> From: Nicolai Hähnle 
>>>
>>> This is for geometry shader outputs. Without it, drivers have no way of
>>> knowing which stream each output is intended for, and have to
>>> conservatively write all outputs to all streams.
>>>
>>> Separate stream numbers for each component are required due to output
>>> packing.
>> Are you sure this is true?
>> This is an area I don't know much about, but
>> https://urldefense.proofpoint.com/v2/url?u=https-3A__www.opengl.org_wiki_Layout-5FQualifier-5F-28GLSL-29=DgIDaQ=uilaK90D4TOVoH58JNXRgQ=_QIjpv-UJ77xEQY8fIYoQtr5qv8wKrPJc7v7_-CYAb0=fVpTGTYN2KTEhU17RpFTxEULrsIfC3bdpEin0k8NIYE=uamnHj-9Xr12ctr0gHDfCMIMHq8DyUBtKIwHQQpjDLs=
>>  
>> tells me "Stream
>> assignments for a geometry shader are required to be the same for all
>> members of a block, but offsets are not."
>>
>> Therefore I don't think output packing should ever happen across
>> multiple streams. I think it would be MUCH nicer if the semantic needed
>> just one stream member...
> 
> There are two variants of that question, I guess.
> 
> The answer to the first variant is: Yes, this is currently true.
> lower_packed_varyings will happily pack outputs from different vertex
> streams into the same vec4. This affects quite a lot of programs, e.g.
> you see it in piglit arb_gpu_shader5-xfb-streams.
> 
> The second question is: Do we want it to be true? I agree that it would
> be convenient to be able to use a single Stream member. Also, isolating
> the stream0 components from the rest would lead to slightly more
> efficient shaders for us in some cases.
> 
> I opted against it so far because I didn't want to think through the
> implications of changing lower_packed_varyings. The main question I have
> is: if you account for the size of the GS output in # of components,
> then it could happen that the number of output vec4s ends up being
> larger than (max # of output components) / 4. Will that be a problem
> somewhere?

I don't know if that would be a problem, but if it is I'd assume this
would be fixable (since the number of actual components ultimately
doesn't change).
Having outputs belonging to multiple streams in a single output just
seems weird...
That said, I wonder if it actually would be possible to do that with
d3d11 too.
With shader model 5 you'd have:
dcl_stream 0
dcl_output o0.xy
dcl_stream 1
dcl_output o0.zw // legal or not???

Though the shader model 4/5 rules are a bit weird for packing
inputs/outputs, I'm not even sure two dcl_output are legal for the same
reg without a dcl_stream in between them (but you can pack system values
together with ordinary inputs/outputs).

So maybe just allowing this is the right solution...

Roland





> 
> Nicolai
> 
>>
>> Roland
>>
>>
>>
>>> ---
>>>  src/compiler/glsl/ir_print_visitor.cpp |  4 +--
>>>  src/gallium/auxiliary/tgsi/tgsi_build.c| 18 +--
>>>  src/gallium/auxiliary/tgsi/tgsi_dump.c | 13 
>>>  src/gallium/auxiliary/tgsi/tgsi_text.c | 48
>>> ++
>>>  src/gallium/include/pipe/p_shader_tokens.h |  5 +++-
>>>  5 files changed, 83 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/src/compiler/glsl/ir_print_visitor.cpp
>>> b/src/compiler/glsl/ir_print_visitor.cpp
>>> index 2b77c14..d401426 100644
>>> --- a/src/compiler/glsl/ir_print_visitor.cpp
>>> +++ b/src/compiler/glsl/ir_print_visitor.cpp
>>> @@ -173,26 +173,26 @@ void ir_print_visitor::visit(ir_variable *ir)
>>> if (ir->data.location != -1)
>>>snprintf(loc, sizeof(loc), "location=%i ", ir->data.location);
>>>
>>> char component[32] = {0};
>>> if (ir->data.explicit_component)
>>>snprintf(component, sizeof(component), "component=%i ",
>>> ir->data.location_frac);
>>>
>>> char stream[32] = {0};
>>> if (ir->data.stream & (1u << 31)) {
>>>if (ir->data.stream & ~(1u << 31)) {
>>> - snprintf(stream, sizeof(stream), "stream(%u,%u,%u,%u)",
>>> + snprintf(stream, sizeof(stream), "stream(%u,%u,%u,%u) ",
>>>ir->data.stream & 3, (ir->data.stream >> 2) & 3,
>>>(ir->data.stream >> 4) & 3, (ir->data.stream >> 6)
>>> & 3);
>>>}
>>> } else if (ir->data.stream) {
>>> -  snprintf(stream, sizeof(stream), "stream%u", ir->data.stream);
>>> +  snprintf(stream, sizeof(stream), "stream%u ", ir->data.stream);
>>> }
>>>
>>> const char *const cent = (ir->data.centroid) ? "centroid " : "";
>>> const char *const samp = (ir->data.sample) ? "sample " : "";
>>> const char *const patc = (ir->data.patch) ? "patch " : "";
>>> const char *const inv = (ir->data.invariant) ? "invariant " : "";
>>> const char *const prec = (ir->data.precise) ? "precise " : "";
>>> const char *const mode[] = { "", "uniform ", "shader_storage ",
>>>  "shader_shared ", 

Re: [Mesa-dev] [PATCH] swr: [rasterizer memory] assert when trying to convert an unknown format

2016-11-30 Thread Rowley, Timothy O
Not seeing this assert fire on our tests either.

Reviewed-by: Tim Rowley 
>

On Nov 29, 2016, at 8:04 PM, Ilia Mirkin 
> wrote:

Signed-off-by: Ilia Mirkin >
---

I've been running this for a little while and haven't hit it. I had a theory
at one point that there was a missing format in there which turned out to be
false, but I think this is still good to have rather than silently fail.

src/gallium/drivers/swr/rasterizer/memory/Convert.h | 1 +
1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/swr/rasterizer/memory/Convert.h 
b/src/gallium/drivers/swr/rasterizer/memory/Convert.h
index c31459c..527324c 100644
--- a/src/gallium/drivers/swr/rasterizer/memory/Convert.h
+++ b/src/gallium/drivers/swr/rasterizer/memory/Convert.h
@@ -724,6 +724,7 @@ INLINE static void ConvertPixelFromFloat(
case R8G8B8_SINT: ConvertPixelFromFloat(pDst, srcPixel); break;
case RAW: ConvertPixelFromFloat(pDst, srcPixel); break;
default:
+SWR_ASSERT(0);
break;
}
}
--
2.7.3


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 27/50] glsl: Add "built-in" function for 64-bit integer sign()

2016-11-30 Thread Matt Turner

On 11/28, Ian Romanick wrote:

From: Ian Romanick 

These functions are directly available in shaders.  A #define is added
to detect the presence.  This allows these functions to be tested using
piglit regardless of whether the driver uses them for lowering.  The
GLSL spec says that functions and macros beginning with __ are reserved
for use by the implementation... hey, that's us!

Signed-off-by: Ian Romanick 
---
src/compiler/glsl/builtin_functions.cpp |   4 +
src/compiler/glsl/builtin_functions.h   |   3 +
src/compiler/glsl/builtin_int64.h   |  26 
src/compiler/glsl/glcpp/glcpp-parse.y   |   1 +
src/compiler/glsl/int64.glsl|  11 ++
src/compiler/glsl/udivmod64.h   | 206 
6 files changed, 251 insertions(+)
create mode 100644 src/compiler/glsl/udivmod64.h


This is supposed to be in a later patch.


signature.asc
Description: Digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 26/50] glsl: Add a lowering pass for 64-bit integer multiplication

2016-11-30 Thread Matt Turner

On 11/28, Ian Romanick wrote:

From: Ian Romanick 

Signed-off-by: Ian Romanick 
---
src/compiler/Makefile.glsl.am|   1 +
src/compiler/Makefile.sources|   1 +
src/compiler/glsl/ir_optimization.h  |   6 +
src/compiler/glsl/lower_64bit.cpp| 374 +++
src/compiler/glsl/tests/lower_64bit_test.cpp | 440 +++
5 files changed, 822 insertions(+)
create mode 100644 src/compiler/glsl/lower_64bit.cpp
create mode 100644 src/compiler/glsl/tests/lower_64bit_test.cpp

diff --git a/src/compiler/Makefile.glsl.am b/src/compiler/Makefile.glsl.am
index 5b1d74e..017ffc7 100644
--- a/src/compiler/Makefile.glsl.am
+++ b/src/compiler/Makefile.glsl.am
@@ -72,6 +72,7 @@ glsl_tests_general_ir_test_SOURCES =  \
glsl/tests/builtin_variable_test.cpp\
glsl/tests/invalidate_locations_test.cpp\
glsl/tests/general_ir_test.cpp  \
+   glsl/tests/lower_64bit_test.cpp \
glsl/tests/opt_add_neg_to_sub_test.cpp  \
glsl/tests/varyings_test.cpp
glsl_tests_general_ir_test_CFLAGS = \
diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 31c4f6a..35e93a3 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -80,6 +80,7 @@ LIBGLSL_FILES = \
glsl/loop_analysis.h \
glsl/loop_controls.cpp \
glsl/loop_unroll.cpp \
+   glsl/lower_64bit.cpp \
glsl/lower_blend_equation_advanced.cpp \
glsl/lower_buffer_access.cpp \
glsl/lower_buffer_access.h \
diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index 0d6c4e6..6fc7ff8 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -50,6 +50,9 @@
#define FIND_MSB_TO_FLOAT_CAST0x4
#define IMUL_HIGH_TO_MUL  0x8

+/* Opertaions for lower_64bit_integer_instructions() */
+#define MUL64 (1U << 0)
+
/**
 * \see class lower_packing_builtins_visitor
 */
@@ -162,3 +165,6 @@ void propagate_invariance(exec_list *instructions);
ir_rvalue *
compare_index_block(exec_list *instructions, ir_variable *index,
unsigned base, unsigned components, void *mem_ctx);
+
+bool lower_64bit_integer_instructions(exec_list *instructions,
+  unsigned what_to_lower);
diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
new file mode 100644
index 000..ccf80cab
--- /dev/null
+++ b/src/compiler/glsl/lower_64bit.cpp


I might call this lower_int64.cpp since 64bit might imply fp64.


@@ -0,0 +1,374 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_64bit.cpp
+ *
+ * Lower 64-bit operations to 32-bit operations.  Each 64-bit value is lowered
+ * to a uvec2.  For each operation that can be lowered, there is a function
+ * called __builtin_foo with the same number of parameters that takes uvec2
+ * sources and produces uvec2 results.  An operation like
+ *
+ * uint64_t(x) * uint64_t(y)
+ *
+ * becomes
+ *
+ * packUint2x32(__builtin_umul64(unpackUint2x32(x), unpackUint2x32(y)));
+ */
+
+#include "main/macros.h"
+#include "compiler/glsl_types.h"
+#include "ir.h"
+#include "ir_rvalue_visitor.h"
+#include "ir_builder.h"
+#include "ir_optimization.h"
+#include "util/hash_table.h"
+#include "builtin_functions.h"
+
+typedef ir_function_signature *(*function_generator)(void *mem_ctx,
+ 
builtin_available_predicate avail);
+
+using namespace ir_builder;
+
+namespace lower_64bit {
+void expand_source(ir_factory &, ir_rvalue *val, ir_variable **expanded_src);
+
+ir_dereference_variable 

[Mesa-dev] Mesa 13.1.0 release plan

2016-11-30 Thread Emil Velikov
Hi all,

With holidays not far off, it might be a nice idea to consider the
branchpoint/release schedule for the next release.

I will be having limited internet access during 20 Dec - 7 Jan, thus
the I'm leaning towards following:
 Jan 13 2017 - Feature freeze/Release candidate 1
 Jan 20 2017 - Release candidate 2
 Jan 27 2017 - Release candidate 3
 Feb 03 2017 - Release candidate 4/final release

How does this align with people's schedules ?

Please let me know if you have any work we want to land before the
next branchpoint.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 25/50] glsl: Add "built-in" functions to do 64x64 => 64 multiplication

2016-11-30 Thread Matt Turner

On 11/28, Ian Romanick wrote:

From: Ian Romanick 

These functions are directly available in shaders.  A #define is added
to detect the presence.  This allows these functions to be tested using
piglit regardless of whether the driver uses them for lowering.  The
GLSL spec says that functions and macros beginning with __ are reserved
for use by the implementation... hey, that's us!

Signed-off-by: Ian Romanick 
---
src/compiler/Makefile.sources   |  2 ++
src/compiler/glsl/builtin_functions.cpp |  9 +
src/compiler/glsl/builtin_functions.h   |  7 +++
src/compiler/glsl/builtin_int64.h   | 30 ++
src/compiler/glsl/generate_ir.cpp   | 33 +
src/compiler/glsl/glcpp/glcpp-parse.y   | 14 +-
src/compiler/glsl/glcpp/glcpp.h |  4 +++-
src/compiler/glsl/glcpp/pp.c|  2 +-
src/compiler/glsl/int64.glsl| 19 +++
9 files changed, 117 insertions(+), 3 deletions(-)
create mode 100644 src/compiler/glsl/builtin_int64.h
create mode 100644 src/compiler/glsl/generate_ir.cpp
create mode 100644 src/compiler/glsl/int64.glsl

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 51d7285..31c4f6a 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -21,8 +21,10 @@ LIBGLSL_FILES = \
glsl/blob.h \
glsl/builtin_functions.cpp \
glsl/builtin_functions.h \
+   glsl/builtin_int64.h \
glsl/builtin_types.cpp \
glsl/builtin_variables.cpp \
+   glsl/generate_ir.cpp \
glsl/glsl_parser_extras.cpp \
glsl/glsl_parser_extras.h \
glsl/glsl_symbol_table.cpp \
diff --git a/src/compiler/glsl/builtin_functions.cpp 
b/src/compiler/glsl/builtin_functions.cpp
index 66c0a74..4f1a874 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -576,6 +576,11 @@ vote(const _mesa_glsl_parse_state *state)
   return state->ARB_shader_group_vote_enable;
}

+static bool
+integer_functions_supported(const _mesa_glsl_parse_state *state)
+{
+   return state->extensions->MESA_shader_integer_functions;
+}
/** @} */

/**/
@@ -3103,6 +3108,10 @@ builtin_builder::create_builtins()
   add_function("allInvocationsARB", _vote(ir_unop_vote_all), NULL);
   add_function("allInvocationsEqualARB", _vote(ir_unop_vote_eq), NULL);

+   add_function("__builtin_umul64",
+generate_ir::umul64(mem_ctx, integer_functions_supported),
+NULL);
+
#undef F
#undef FI
#undef FIUD
diff --git a/src/compiler/glsl/builtin_functions.h 
b/src/compiler/glsl/builtin_functions.h
index 747b4fb..a79fb97 100644
--- a/src/compiler/glsl/builtin_functions.h
+++ b/src/compiler/glsl/builtin_functions.h
@@ -43,4 +43,11 @@ _mesa_get_main_function_signature(glsl_symbol_table 
*symbols);
extern void
_mesa_glsl_release_builtin_functions(void);

+namespace generate_ir {
+
+ir_function_signature *
+umul64(void *mem_ctx, builtin_available_predicate avail);
+
+}
+
#endif /* BULITIN_FUNCTIONS_H */
diff --git a/src/compiler/glsl/builtin_int64.h 
b/src/compiler/glsl/builtin_int64.h
new file mode 100644
index 000..108da08
--- /dev/null
+++ b/src/compiler/glsl/builtin_int64.h
@@ -0,0 +1,30 @@
+ir_function_signature *
+umul64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
+   ir_factory body(>body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0001 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r0001);
+   ir_variable *const r0002 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"b", ir_var_function_in);
+   sig_parameters.push_tail(r0002);
+   ir_variable *const r0003 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"result", ir_var_auto);
+   body.emit(r0003);
+   body.emit(assign(r0003, imul_high(swizzle_x(r0001), swizzle_x(r0002)), 
0x02));
+
+   body.emit(assign(r0003, mul(swizzle_x(r0001), swizzle_x(r0002)), 0x01));
+
+   ir_expression *const r0004 = mul(swizzle_x(r0001), swizzle_y(r0002));
+   ir_expression *const r0005 = mul(swizzle_y(r0001), swizzle_x(r0002));
+   ir_expression *const r0006 = add(r0004, r0005);
+   body.emit(assign(r0003, add(swizzle_y(r0003), r0006), 0x02));
+
+   body.emit(ret(r0003));
+
+   sig->replace_parameters(_parameters);
+   return sig;
+}
diff --git a/src/compiler/glsl/generate_ir.cpp 
b/src/compiler/glsl/generate_ir.cpp
new file mode 100644
index 000..255b048
--- /dev/null
+++ b/src/compiler/glsl/generate_ir.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated 

[Mesa-dev] [PATCH 2/2] i965: Add i965 plumbing for ARB_post_depth_coverage for i965 (gen9+).

2016-11-30 Thread Plamena Manolova
This extension allows the fragment shader to control whether values in
gl_SampleMaskIn[] reflect the coverage after application of the early
depth and stencil tests.

Signed-off-by: Plamena Manolova 
---
 docs/relnotes/13.1.0.html|  1 +
 src/mesa/drivers/dri/i965/brw_compiler.h |  1 +
 src/mesa/drivers/dri/i965/brw_fs.cpp |  1 +
 src/mesa/drivers/dri/i965/gen8_ps_state.c| 13 ++---
 src/mesa/drivers/dri/i965/intel_extensions.c |  1 +
 5 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/docs/relnotes/13.1.0.html b/docs/relnotes/13.1.0.html
index 4f76cc2..a160cda 100644
--- a/docs/relnotes/13.1.0.html
+++ b/docs/relnotes/13.1.0.html
@@ -45,6 +45,7 @@ Note: some of the new features are only available with 
certain drivers.
 
 
 GL_NV_image_formats on any driver supporting 
GL_ARB_shader_image_load_store (i965, nvc0, radeonsi, softpipe)
+GL_ARB_post_depth_coverage on i965/gen9+
 
 
 Bug fixes
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h 
b/src/mesa/drivers/dri/i965/brw_compiler.h
index 65a7478..410641f 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -397,6 +397,7 @@ struct brw_wm_prog_data {
bool computed_stencil;
 
bool early_fragment_tests;
+   bool post_depth_coverage;
bool dispatch_8;
bool dispatch_16;
bool dual_src_blend;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c218f56..ce0c07e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -6454,6 +6454,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void 
*log_data,
shader->info->outputs_read);
 
prog_data->early_fragment_tests = shader->info->fs.early_fragment_tests;
+   prog_data->post_depth_coverage = shader->info->fs.post_depth_coverage;
 
prog_data->barycentric_interp_modes =
   brw_compute_barycentric_interp_modes(compiler->devinfo, shader);
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c 
b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index a4eb962..33ef023 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -53,10 +53,17 @@ gen8_upload_ps_extra(struct brw_context *brw,
   dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE;
 
if (prog_data->uses_sample_mask) {
-  if (brw->gen >= 9)
- dw1 |= BRW_PSICMS_INNER << GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
-  else
+  if (brw->gen >= 9) {
+ if (prog_data->post_depth_coverage) {
+dw1 |= BRW_PCICMS_DEPTH << 
GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
+ }
+ else {
+dw1 |= BRW_PSICMS_INNER << 
GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
+ }
+  }
+  else {
  dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK;
+  }
}
 
if (prog_data->uses_omask)
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 66079b5..19f4684 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -415,6 +415,7 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.KHR_texture_compression_astc_ldr = true;
   ctx->Extensions.KHR_texture_compression_astc_sliced_3d = true;
   ctx->Extensions.MESA_shader_framebuffer_fetch = true;
+  ctx->Extensions.ARB_post_depth_coverage = true;
}
 
if (ctx->API == API_OPENGL_CORE)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] mesa: Add GL and GLSL plumbing for ARB_post_depth_coverage for i965 (gen9+).

2016-11-30 Thread Plamena Manolova
This extension allows the fragment shader to control whether values in
gl_SampleMaskIn[] reflect the coverage after application of the early
depth and stencil tests.

Signed-off-by: Plamena Manolova 
---
 src/compiler/glsl/ast.h  |  5 +
 src/compiler/glsl/ast_to_hir.cpp |  5 +
 src/compiler/glsl/ast_type.cpp   |  9 -
 src/compiler/glsl/glsl_parser.yy | 18 ++
 src/compiler/glsl/glsl_parser_extras.cpp |  4 
 src/compiler/glsl/glsl_parser_extras.h   |  4 
 src/compiler/glsl/linker.cpp |  4 
 src/compiler/shader_info.h   |  1 +
 src/mesa/main/extensions_table.h |  1 +
 src/mesa/main/mtypes.h   |  2 ++
 src/mesa/main/shaderapi.c|  1 +
 11 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
index afe91ea..df3a744 100644
--- a/src/compiler/glsl/ast.h
+++ b/src/compiler/glsl/ast.h
@@ -605,6 +605,11 @@ struct ast_type_qualifier {
  /** \{ */
  unsigned blend_support:1; /**< Are there any blend_support_ 
qualifiers */
  /** \} */
+
+ /**
+  * Flag set if GL_ARB_post_depth_coverage layout qualifier is used.
+  */
+ unsigned post_depth_coverage:1;
   }
   /** \brief Set of flags, accessed by name. */
   q;
diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index c2ce389..2434ce5 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -3632,6 +3632,11 @@ apply_layout_qualifier_to_variable(const struct 
ast_type_qualifier *qual,
   _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only 
"
"valid in fragment shader input layout declaration.");
}
+
+   if (qual->flags.q.post_depth_coverage) {
+  _mesa_glsl_error(loc, state, "post_depth_coverage layout qualifier only "
+   "valid in fragment shader input layout declaration.");
+   }
 }
 
 static void
diff --git a/src/compiler/glsl/ast_type.cpp b/src/compiler/glsl/ast_type.cpp
index 3431e24..aa1ae7e 100644
--- a/src/compiler/glsl/ast_type.cpp
+++ b/src/compiler/glsl/ast_type.cpp
@@ -579,6 +579,7 @@ ast_type_qualifier::validate_in_qualifier(YYLTYPE *loc,
   break;
case MESA_SHADER_FRAGMENT:
   valid_in_mask.flags.q.early_fragment_tests = 1;
+  valid_in_mask.flags.q.post_depth_coverage = 1;
   break;
case MESA_SHADER_COMPUTE:
   valid_in_mask.flags.q.local_size = 7;
@@ -633,6 +634,11 @@ ast_type_qualifier::merge_into_in_qualifier(YYLTYPE *loc,
   state->in_qualifier->flags.q.early_fragment_tests = false;
}
 
+   if (state->in_qualifier->flags.q.post_depth_coverage) {
+  state->fs_post_depth_coverage = true;
+  state->in_qualifier->flags.q.post_depth_coverage = false;
+   }
+
/* We allow the creation of multiple cs_input_layout nodes. Coherence among
 * all existing nodes is checked later, when the AST node is transformed
 * into HIR.
@@ -761,7 +767,8 @@ ast_type_qualifier::validate_flags(YYLTYPE *loc,
 bad.flags.q.point_mode ? " point_mode" : "",
 bad.flags.q.vertices ? " vertices" : "",
 bad.flags.q.subroutine ? " subroutine" : "",
-bad.flags.q.subroutine_def ? " subroutine_def" : "");
+bad.flags.q.subroutine_def ? " subroutine_def" : "",
+bad.flags.q.post_depth_coverage ? " post_depth_coverage" : 
"");
return false;
 }
 
diff --git a/src/compiler/glsl/glsl_parser.yy b/src/compiler/glsl/glsl_parser.yy
index 0c3781c..09b7e79 100644
--- a/src/compiler/glsl/glsl_parser.yy
+++ b/src/compiler/glsl/glsl_parser.yy
@@ -1392,6 +1392,24 @@ layout_qualifier_id:
 
 $$.flags.q.early_fragment_tests = 1;
  }
+
+ if (!$$.flags.i &&
+ match_layout_qualifier($1, "post_depth_coverage", state) == 0) {
+if (state->stage != MESA_SHADER_FRAGMENT) {
+   _mesa_glsl_error(& @1, state,
+"post_depth_coverage layout qualifier only "
+"valid in fragment shaders");
+}
+
+if (state->ARB_post_depth_coverage_enable) {
+   $$.flags.q.post_depth_coverage = 1;
+} else {
+   _mesa_glsl_error(& @1, state,
+"post_depth_coverage layout qualifier present, 
"
+"but the GL_ARB_post_depth_coverage extension "
+"is not enabled.");
+}
+ }
   }
 
   /* Layout qualifiers for tessellation evaluation shaders. */
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
b/src/compiler/glsl/glsl_parser_extras.cpp
index 1e0298e..d1fc98d 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ 

Re: [Mesa-dev] [PATCH 23/50] glsl/standalone: Enable ARB_gpu_shader_int64

2016-11-30 Thread Matt Turner

On 11/28, Ian Romanick wrote:

From: Ian Romanick 

Signed-off-by: Ian Romanick 
---
src/compiler/glsl/ir_builder_print_visitor.cpp | 16 +---
src/compiler/glsl/standalone_scaffolding.cpp   |  1 +
2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/compiler/glsl/ir_builder_print_visitor.cpp 
b/src/compiler/glsl/ir_builder_print_visitor.cpp
index 91a73c9..a3a5fe4 100644
--- a/src/compiler/glsl/ir_builder_print_visitor.cpp
+++ b/src/compiler/glsl/ir_builder_print_visitor.cpp
@@ -396,13 +396,23 @@ ir_builder_print_visitor::visit(ir_constant *ir)

memcpy(, >value.d[i], sizeof(v));
if (v != 0)
-   /* FIXME: This won't actually work until ARB_gpu_shader_int64
-* support lands.
-*/
   print_without_indent("r%04X_data.u64[%u] = 0x%016" PRIx64 "; /* %g 
*/\n",
my_index, i, v, ir->value.d[i]);
break;
 }
+ case GLSL_TYPE_UINT64:
+if (ir->value.u64[i] != 0)
+   print_without_indent("r%04X_data.u64[%u] = %" PRIu64 ";\n",
+my_index,
+i,
+ir->value.u64[i]);
+break;
+ case GLSL_TYPE_INT64:
+if (ir->value.i64[i] != 0)
+   print_without_indent("r%04X_data.i64[%u] = %" PRId64 ";\n",
+my_index,
+i,
+ir->value.i64[i]);


Missing break. With that fixed,

Reviewed-by: Matt Turner 


signature.asc
Description: Digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 98428] Undefined non-weak-symbol in dri-drivers

2016-11-30 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=98428

Jonathan Dieter  changed:

   What|Removed |Added

 CC||jdie...@lesbg.com

--- Comment #12 from Jonathan Dieter  ---
Created attachment 128291
  --> https://bugs.freedesktop.org/attachment.cgi?id=128291=edit
Updated patch that applies to both mesa and gallium drivers

I'm not quite sure what the final consensus is on this, but I'm trying to get a
multiseat system working with some seats using the Intel and AMD open drivers
and another using NVIDIA's binary driver.  I've gotten it working, but it
required the attached patch (built on kwizart's original patch), so just
thought I'd submit it.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/23] tgsi: add Stream{X, Y, Z, W} fields to tgsi_declaration_semantic

2016-11-30 Thread Nicolai Hähnle

On 30.11.2016 19:06, Roland Scheidegger wrote:

Am 30.11.2016 um 14:35 schrieb Nicolai Hähnle:

From: Nicolai Hähnle 

This is for geometry shader outputs. Without it, drivers have no way of
knowing which stream each output is intended for, and have to
conservatively write all outputs to all streams.

Separate stream numbers for each component are required due to output
packing.

Are you sure this is true?
This is an area I don't know much about, but
https://www.opengl.org/wiki/Layout_Qualifier_(GLSL) tells me "Stream
assignments for a geometry shader are required to be the same for all
members of a block, but offsets are not."

Therefore I don't think output packing should ever happen across
multiple streams. I think it would be MUCH nicer if the semantic needed
just one stream member...


There are two variants of that question, I guess.

The answer to the first variant is: Yes, this is currently true. 
lower_packed_varyings will happily pack outputs from different vertex 
streams into the same vec4. This affects quite a lot of programs, e.g. 
you see it in piglit arb_gpu_shader5-xfb-streams.


The second question is: Do we want it to be true? I agree that it would 
be convenient to be able to use a single Stream member. Also, isolating 
the stream0 components from the rest would lead to slightly more 
efficient shaders for us in some cases.


I opted against it so far because I didn't want to think through the 
implications of changing lower_packed_varyings. The main question I have 
is: if you account for the size of the GS output in # of components, 
then it could happen that the number of output vec4s ends up being 
larger than (max # of output components) / 4. Will that be a problem 
somewhere?


Nicolai



Roland




---
 src/compiler/glsl/ir_print_visitor.cpp |  4 +--
 src/gallium/auxiliary/tgsi/tgsi_build.c| 18 +--
 src/gallium/auxiliary/tgsi/tgsi_dump.c | 13 
 src/gallium/auxiliary/tgsi/tgsi_text.c | 48 ++
 src/gallium/include/pipe/p_shader_tokens.h |  5 +++-
 5 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/src/compiler/glsl/ir_print_visitor.cpp 
b/src/compiler/glsl/ir_print_visitor.cpp
index 2b77c14..d401426 100644
--- a/src/compiler/glsl/ir_print_visitor.cpp
+++ b/src/compiler/glsl/ir_print_visitor.cpp
@@ -173,26 +173,26 @@ void ir_print_visitor::visit(ir_variable *ir)
if (ir->data.location != -1)
   snprintf(loc, sizeof(loc), "location=%i ", ir->data.location);

char component[32] = {0};
if (ir->data.explicit_component)
   snprintf(component, sizeof(component), "component=%i ", 
ir->data.location_frac);

char stream[32] = {0};
if (ir->data.stream & (1u << 31)) {
   if (ir->data.stream & ~(1u << 31)) {
- snprintf(stream, sizeof(stream), "stream(%u,%u,%u,%u)",
+ snprintf(stream, sizeof(stream), "stream(%u,%u,%u,%u) ",
   ir->data.stream & 3, (ir->data.stream >> 2) & 3,
   (ir->data.stream >> 4) & 3, (ir->data.stream >> 6) & 3);
   }
} else if (ir->data.stream) {
-  snprintf(stream, sizeof(stream), "stream%u", ir->data.stream);
+  snprintf(stream, sizeof(stream), "stream%u ", ir->data.stream);
}

const char *const cent = (ir->data.centroid) ? "centroid " : "";
const char *const samp = (ir->data.sample) ? "sample " : "";
const char *const patc = (ir->data.patch) ? "patch " : "";
const char *const inv = (ir->data.invariant) ? "invariant " : "";
const char *const prec = (ir->data.precise) ? "precise " : "";
const char *const mode[] = { "", "uniform ", "shader_storage ",
 "shader_shared ", "shader_in ", "shader_out ",
 "in ", "out ", "inout ",
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c 
b/src/gallium/auxiliary/tgsi/tgsi_build.c
index d525c8f..773f892 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -232,40 +232,50 @@ tgsi_build_declaration_interp(unsigned interpolate,
return di;
 }

 static struct tgsi_declaration_semantic
 tgsi_default_declaration_semantic( void )
 {
struct tgsi_declaration_semantic ds;

ds.Name = TGSI_SEMANTIC_POSITION;
ds.Index = 0;
-   ds.Padding = 0;
+   ds.StreamX = 0;
+   ds.StreamY = 0;
+   ds.StreamZ = 0;
+   ds.StreamW = 0;

return ds;
 }

 static struct tgsi_declaration_semantic
 tgsi_build_declaration_semantic(
unsigned semantic_name,
unsigned semantic_index,
+   unsigned streamx,
+   unsigned streamy,
+   unsigned streamz,
+   unsigned streamw,
struct tgsi_declaration *declaration,
struct tgsi_header *header )
 {
struct tgsi_declaration_semantic ds;

assert( semantic_name <= TGSI_SEMANTIC_COUNT );
assert( semantic_index <= 0x );

ds.Name = semantic_name;
ds.Index = semantic_index;
-   ds.Padding = 0;
+   ds.StreamX = streamx;
+   ds.StreamY = streamy;
+   

Re: [Mesa-dev] [PATCH 02/23] tgsi: add Stream{X, Y, Z, W} fields to tgsi_declaration_semantic

2016-11-30 Thread Roland Scheidegger
Am 30.11.2016 um 14:35 schrieb Nicolai Hähnle:
> From: Nicolai Hähnle 
> 
> This is for geometry shader outputs. Without it, drivers have no way of
> knowing which stream each output is intended for, and have to
> conservatively write all outputs to all streams.
> 
> Separate stream numbers for each component are required due to output
> packing.
Are you sure this is true?
This is an area I don't know much about, but
https://www.opengl.org/wiki/Layout_Qualifier_(GLSL) tells me "Stream
assignments for a geometry shader are required to be the same for all
members of a block, but offsets are not."

Therefore I don't think output packing should ever happen across
multiple streams. I think it would be MUCH nicer if the semantic needed
just one stream member...

Roland



> ---
>  src/compiler/glsl/ir_print_visitor.cpp |  4 +--
>  src/gallium/auxiliary/tgsi/tgsi_build.c| 18 +--
>  src/gallium/auxiliary/tgsi/tgsi_dump.c | 13 
>  src/gallium/auxiliary/tgsi/tgsi_text.c | 48 
> ++
>  src/gallium/include/pipe/p_shader_tokens.h |  5 +++-
>  5 files changed, 83 insertions(+), 5 deletions(-)
> 
> diff --git a/src/compiler/glsl/ir_print_visitor.cpp 
> b/src/compiler/glsl/ir_print_visitor.cpp
> index 2b77c14..d401426 100644
> --- a/src/compiler/glsl/ir_print_visitor.cpp
> +++ b/src/compiler/glsl/ir_print_visitor.cpp
> @@ -173,26 +173,26 @@ void ir_print_visitor::visit(ir_variable *ir)
> if (ir->data.location != -1)
>snprintf(loc, sizeof(loc), "location=%i ", ir->data.location);
>  
> char component[32] = {0};
> if (ir->data.explicit_component)
>snprintf(component, sizeof(component), "component=%i ", 
> ir->data.location_frac);
>  
> char stream[32] = {0};
> if (ir->data.stream & (1u << 31)) {
>if (ir->data.stream & ~(1u << 31)) {
> - snprintf(stream, sizeof(stream), "stream(%u,%u,%u,%u)",
> + snprintf(stream, sizeof(stream), "stream(%u,%u,%u,%u) ",
>ir->data.stream & 3, (ir->data.stream >> 2) & 3,
>(ir->data.stream >> 4) & 3, (ir->data.stream >> 6) & 3);
>}
> } else if (ir->data.stream) {
> -  snprintf(stream, sizeof(stream), "stream%u", ir->data.stream);
> +  snprintf(stream, sizeof(stream), "stream%u ", ir->data.stream);
> }
>  
> const char *const cent = (ir->data.centroid) ? "centroid " : "";
> const char *const samp = (ir->data.sample) ? "sample " : "";
> const char *const patc = (ir->data.patch) ? "patch " : "";
> const char *const inv = (ir->data.invariant) ? "invariant " : "";
> const char *const prec = (ir->data.precise) ? "precise " : "";
> const char *const mode[] = { "", "uniform ", "shader_storage ",
>  "shader_shared ", "shader_in ", "shader_out 
> ",
>  "in ", "out ", "inout ",
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c 
> b/src/gallium/auxiliary/tgsi/tgsi_build.c
> index d525c8f..773f892 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_build.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
> @@ -232,40 +232,50 @@ tgsi_build_declaration_interp(unsigned interpolate,
> return di;
>  }
>  
>  static struct tgsi_declaration_semantic
>  tgsi_default_declaration_semantic( void )
>  {
> struct tgsi_declaration_semantic ds;
>  
> ds.Name = TGSI_SEMANTIC_POSITION;
> ds.Index = 0;
> -   ds.Padding = 0;
> +   ds.StreamX = 0;
> +   ds.StreamY = 0;
> +   ds.StreamZ = 0;
> +   ds.StreamW = 0;
>  
> return ds;
>  }
>  
>  static struct tgsi_declaration_semantic
>  tgsi_build_declaration_semantic(
> unsigned semantic_name,
> unsigned semantic_index,
> +   unsigned streamx,
> +   unsigned streamy,
> +   unsigned streamz,
> +   unsigned streamw,
> struct tgsi_declaration *declaration,
> struct tgsi_header *header )
>  {
> struct tgsi_declaration_semantic ds;
>  
> assert( semantic_name <= TGSI_SEMANTIC_COUNT );
> assert( semantic_index <= 0x );
>  
> ds.Name = semantic_name;
> ds.Index = semantic_index;
> -   ds.Padding = 0;
> +   ds.StreamX = streamx;
> +   ds.StreamY = streamy;
> +   ds.StreamZ = streamz;
> +   ds.StreamW = streamw;
>  
> declaration_grow( declaration, header );
>  
> return ds;
>  }
>  
>  static struct tgsi_declaration_image
>  tgsi_default_declaration_image(void)
>  {
> struct tgsi_declaration_image di;
> @@ -454,20 +464,24 @@ tgsi_build_full_declaration(
>struct tgsi_declaration_semantic *ds;
>  
>if( maxsize <= size )
>   return  0;
>ds = (struct tgsi_declaration_semantic *) [size];
>size++;
>  
>*ds = tgsi_build_declaration_semantic(
>   full_decl->Semantic.Name,
>   full_decl->Semantic.Index,
> + full_decl->Semantic.StreamX,
> + full_decl->Semantic.StreamY,
> + full_decl->Semantic.StreamZ,
> + full_decl->Semantic.StreamW,
> 

Re: [Mesa-dev] [PATCH] anv/cmd_buffer: Actually use the stencil dimension

2016-11-30 Thread Nanley Chery
On Tue, Nov 29, 2016 at 05:41:58PM -0800, Jason Ekstrand wrote:
> In an attempt to fix 3DSTATE_DEPTH_BUFFER for stencil-only cases, I
> accidentally kept setting the SurfaceType to 2D in the stencil-only case
> thanks to a copy+paste error.
> 
> Cc: Nanley Chery 
> ---
>  src/intel/vulkan/genX_cmd_buffer.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Thanks for the fix! This patch is,
Reviewed-by: Nanley Chery 

> 
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 73f4523..f761d9a 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -2153,7 +2153,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer 
> *cmd_buffer)
> */
>anv_batch_emit(_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), db) {
>   if (has_stencil) {
> -db.SurfaceType   = SURFTYPE_2D;
> +db.SurfaceType   =
> depth_stencil_surface_type(image->stencil_surface.isl.dim);
>   } else {
>  db.SurfaceType   = SURFTYPE_2D;
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: Prefer in-tree headers to out-of-tree headers

2016-11-30 Thread Ville Syrjälä
On Tue, Nov 29, 2016 at 08:54:26AM -0800, Jason Ekstrand wrote:
> On Tue, Nov 29, 2016 at 8:47 AM, Ville Syrjälä <
> ville.syrj...@linux.intel.com> wrote:
> 
> > On Tue, Nov 29, 2016 at 08:28:55AM -0800, Jason Ekstrand wrote:
> > > On Tue, Nov 29, 2016 at 1:23 AM,  wrote:
> > >
> > > > From: Ville Syrjälä 
> > > >
> > > > Set the include paths to consider in-tree headers before out-of-tree
> > > > headers.
> > > >
> > > > Avoids the build failing due to stale headers being present in
> > > > $prefix. Previosuly 'make -ki install' or something similar was
> > required
> > > > to update the out-of-tree headers to allow the build to succeed.
> > > >
> > > > Also avoids having to rebuild the entire thing after every 'make
> > > > install'.
> > > >
> > > > Cc: Rob Clark 
> > > > Cc: Jason Ekstrand 
> > > > Signed-off-by: Ville Syrjälä 
> > > > ---
> > > >  src/intel/vulkan/Makefile.am | 16 +++-
> > > >  1 file changed, 11 insertions(+), 5 deletions(-)
> > > >
> > > > diff --git a/src/intel/vulkan/Makefile.am
> > b/src/intel/vulkan/Makefile.am
> > > > index ce31abb2fce7..b80f8767296c 100644
> > > > --- a/src/intel/vulkan/Makefile.am
> > > > +++ b/src/intel/vulkan/Makefile.am
> > > > @@ -43,9 +43,6 @@ noinst_LTLIBRARIES = $(PER_GEN_LIBS)
> > > >  # The gallium includes are for the util/u_math.h include from
> > > > main/macros.h
> > > >
> > > >  AM_CPPFLAGS = \
> > > > -   $(INTEL_CFLAGS) \
> > > > -   $(VALGRIND_CFLAGS) \
> > > > -   $(DEFINES) \
> > > > -I$(top_srcdir)/include \
> > > > -I$(top_builddir)/src \
> > > > -I$(top_srcdir)/src \
> > > > @@ -61,6 +58,17 @@ AM_CPPFLAGS = \
> > > > -I$(top_builddir)/src/intel \
> > > > -I$(top_srcdir)/src/intel
> > > >
> > > > +if HAVE_PLATFORM_WAYLAND
> > > > +AM_CPPFLAGS += \
> > > > +   -I$(top_builddir)/src/egl/wayland/wayland-drm \
> > > > +   -I$(top_srcdir)/src/egl/wayland/wayland-drm
> > > > +endif
> > > >
> > >
> > > I think I have a mild preference for keeping wayland stuff together and
> > > moving the last AM_CPPFLAGS down but I don't care that much.  Either way,
> >
> > Would we still want all internal -I knobs to appear before any
> > external ones? To do that I'd have flip the x11 vs. wayland stuff
> > around. And if someone were to add internal -I knobs for x11 then
> > this scheme wouldn't work either way.
> >
> 
> Oh... I dind't think about that interaction.  Go with the way you had it
> for now.
> 
> 
> > Not sure if sticking to a strict global ordering like that is needed,
> > but at least it would seem a bit easier to maintain as you wouldn't
> > have to think too hard when adding new flags.
> >
> 
> We could have two things and then combine them later but that seems a bit
> painful.  Feel free to ignore my comments. :)

I did ;) Patch pushed. Thanks for the reviews.

-- 
Ville Syrjälä
Intel OTC
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] swr: fix range computation for instanced client-side arrays

2016-11-30 Thread Cherniak, Bruce
Reviewed-by: Bruce Cherniak  

> On Nov 29, 2016, at 8:18 PM, Ilia Mirkin  wrote:
> 
> We need to take the instance divisor and number of instances into
> account for instanced client-side arrays, rather than the vertex
> parameters.
> 
> Loosely based on the comparable nvc0 logic.
> 
> Signed-off-by: Ilia Mirkin 
> ---
> src/gallium/drivers/swr/swr_state.cpp | 74 +++
> src/gallium/drivers/swr/swr_state.h   |  2 +
> 2 files changed, 52 insertions(+), 24 deletions(-)
> 
> diff --git a/src/gallium/drivers/swr/swr_state.cpp 
> b/src/gallium/drivers/swr/swr_state.cpp
> index f1cf4ae..9f6b5b0 100644
> --- a/src/gallium/drivers/swr/swr_state.cpp
> +++ b/src/gallium/drivers/swr/swr_state.cpp
> @@ -494,6 +494,15 @@ swr_create_vertex_elements_state(struct pipe_context 
> *pipe,
>  const SWR_FORMAT_INFO _desc = GetFormatInfo(
> mesa_to_swr_format(attribs[i].src_format));
>  velems->stream_pitch[attribs[i].vertex_buffer_index] += swr_desc.Bpp;
> +
> + if (attribs[i].instance_divisor != 0) {
> +velems->instanced_bufs |= 1U << attribs[i].vertex_buffer_index;
> +uint32_t *min_instance_div =
> +   >min_instance_div[attribs[i].vertex_buffer_index];
> +if (!*min_instance_div ||
> +attribs[i].instance_divisor < *min_instance_div)
> +   *min_instance_div = attribs[i].instance_divisor;
> + }
>   }
>}
> 
> @@ -870,6 +879,34 @@ swr_change_rt(struct swr_context *ctx,
>return need_fence;
> }
> 
> +static inline void
> +swr_user_vbuf_range(const struct pipe_draw_info *info,
> +const struct swr_vertex_element_state *velems,
> +const struct pipe_vertex_buffer *vb,
> +uint32_t i,
> +uint32_t *totelems,
> +uint32_t *base,
> +uint32_t *size)
> +{
> +   /* FIXME: The size is too large - we don't access the full extra stride. 
> */
> +   unsigned elems;
> +   if (velems->instanced_bufs & (1U << i)) {
> +  elems = info->instance_count / velems->min_instance_div[i] + 1;
> +  *totelems = info->start_instance + elems;
> +  *base = info->start_instance * vb->stride;
> +  *size = elems * vb->stride;
> +   } else if (vb->stride) {
> +  elems = info->max_index - info->min_index + 1;
> +  *totelems = info->max_index + 1;
> +  *base = info->min_index * vb->stride;
> +  *size = elems * vb->stride;
> +   } else {
> +  *totelems = 1;
> +  *base = 0;
> +  *size = velems->stream_pitch[i];
> +   }
> +}
> +
> void
> swr_update_derived(struct pipe_context *pipe,
>const struct pipe_draw_info *p_draw_info)
> @@ -1039,8 +1076,7 @@ swr_update_derived(struct pipe_context *pipe,
>/* Set vertex & index buffers */
>/* (using draw info if called by swr_draw_vbo) */
>if (ctx->dirty & SWR_NEW_VERTEX) {
> -  uint32_t size, pitch, max_vertex, partial_inbounds, scratch_total;
> -  const uint8_t *p_data;
> +  uint32_t scratch_total;
>   uint8_t *scratch = NULL;
> 
>   /* If being called by swr_draw_vbo, copy draw details */
> @@ -1056,14 +1092,8 @@ swr_update_derived(struct pipe_context *pipe,
>  if (!vb->user_buffer)
> continue;
> 
> - if (vb->stride) {
> -size = (info.max_index - info.min_index + 1) * vb->stride;
> - } else {
> -/* pitch = 0, means constant value
> - * set size to 1 vertex */
> -size = ctx->velems->stream_pitch[i];
> - }
> -
> + uint32_t elems, base, size;
> + swr_user_vbuf_range(, ctx->velems, vb, i, , , 
> );
>  scratch_total += AlignUp(size, 4);
>   }
> 
> @@ -1075,6 +1105,8 @@ swr_update_derived(struct pipe_context *pipe,
>   /* vertex buffers */
>   SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
>   for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
> + uint32_t size, pitch, elems, partial_inbounds;
> + const uint8_t *p_data;
>  struct pipe_vertex_buffer *vb = >vertex_buffer[i];
> 
>  pitch = vb->stride;
> @@ -1083,7 +1115,7 @@ swr_update_derived(struct pipe_context *pipe,
>  * size is based on buffer->width0 rather than info.max_index
>  * to prevent having to validate VBO on each draw */
> size = vb->buffer->width0;
> -max_vertex = size / pitch;
> +elems = size / pitch;
> partial_inbounds = size % pitch;
> 
> p_data = swr_resource_data(vb->buffer) + vb->buffer_offset;
> @@ -1093,25 +1125,17 @@ swr_update_derived(struct pipe_context *pipe,
>  * revalidate on each draw */
> post_update_dirty_flags |= SWR_NEW_VERTEX;
> 
> -if (pitch) {
> -   size = (info.max_index - info.min_index + 1) * pitch;
> -   

[Mesa-dev] [Bug 98911] [Ironlake Mobile] Severe Artifacting in Minecraft and weird "screen leftovers" when recording desktop with latest Mesa

2016-11-30 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=98911

Mark Janes  changed:

   What|Removed |Added

 CC||mark.a.ja...@intel.com

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: Prefer in-tree headers to out-of-tree headers

2016-11-30 Thread Chad Versace
On Tue 29 Nov 2016, ville.syrj...@linux.intel.com wrote:
> From: Ville Syrjälä 
> 
> Set the include paths to consider in-tree headers before out-of-tree
> headers.
> 
> Avoids the build failing due to stale headers being present in
> $prefix. Previosuly 'make -ki install' or something similar was required
> to update the out-of-tree headers to allow the build to succeed.
> 
> Also avoids having to rebuild the entire thing after every 'make
> install'.
> 
> Cc: Rob Clark 
> Cc: Jason Ekstrand 
> Signed-off-by: Ville Syrjälä 

Thanks for fixing this. The bug bit me too.
Reviewed-by: Chad Versace 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/23] radeonsi: do not kill GS with memory writes

2016-11-30 Thread Nicolai Hähnle

On 30.11.2016 14:35, Nicolai Hähnle wrote:

From: Nicolai Hähnle 

Vertex emits beyond the specified maximum number of vertices are supposed to
have no effect, which is why we used to always kill GS that reached the limit.

However, if the GS also writes to memory (SSBO, atomics, shader images), then
we must keep going and only skip the vertex emit itself.

Cc: mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/radeonsi/si_shader.c | 30 ++
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 1e3be62..aac3091 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5116,50 +5116,60 @@ static unsigned si_llvm_get_stream(struct 
lp_build_tgsi_context *bld_base,
 static void si_llvm_emit_vertex(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
struct lp_build_context *uint = _base->uint_bld;
struct si_shader *shader = ctx->shader;
struct tgsi_shader_info *info = >selector->info;
struct gallivm_state *gallivm = bld_base->base.gallivm;
+   struct lp_build_if_state if_state;
LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
SI_PARAM_GS2VS_OFFSET);
LLVMValueRef gs_next_vertex;
LLVMValueRef can_emit, kill;
LLVMValueRef args[2];
unsigned chan;
int i;
unsigned stream;

stream = si_llvm_get_stream(bld_base, emit_data);

/* Write vertex attribute values to GSVS ring */
gs_next_vertex = LLVMBuildLoad(gallivm->builder,
   ctx->gs_next_vertex[stream],
   "");

/* If this thread has already emitted the declared maximum number of
-* vertices, kill it: excessive vertex emissions are not supposed to
-* have any effect, and GS threads have no externally observable
-* effects other than emitting vertices.
+* vertices, skip the write: excessive vertex emissions are not
+* supposed to have any effect.
+*
+* If the shader has no writes to memory, kill it instead. This skips
+* further memory loads and may allow LLVM to skip to the end
+* altogether.
 */
can_emit = LLVMBuildICmp(gallivm->builder, LLVMIntULE, gs_next_vertex,
 lp_build_const_int32(gallivm,
  
shader->selector->gs_max_out_vertices), "");
-   kill = lp_build_select(_base->base, can_emit,
-  lp_build_const_float(gallivm, 1.0f),
-  lp_build_const_float(gallivm, -1.0f));

-   lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
-  ctx->voidt, , 1, 0);
+   bool use_kill = !info->writes_memory;


I think there's a && max_streams == 0 missing here.

I'm pretty sure the intention is that the max_vertices is per-stream, 
but reading through the GLSL docs again, it kind of sounds like at least 
the spec language mandates it to be per-invocation (i.e. sum across 
streams).


In any case, what we do here is inconsistent with either interpretation, 
but I'm also not aware of any test case (yet?) which would vote one way 
or the other.


Any ideas?

Thanks,
Nicolai


+   if (use_kill) {
+   kill = lp_build_select(_base->base, can_emit,
+  lp_build_const_float(gallivm, 1.0f),
+  lp_build_const_float(gallivm, -1.0f));
+
+   lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
+  ctx->voidt, , 1, 0);
+   } else {
+   lp_build_if(_state, gallivm, can_emit);
+   }

for (i = 0; i < info->num_outputs; i++) {
LLVMValueRef *out_ptr =
ctx->soa.outputs[i];

for (chan = 0; chan < 4; chan++) {
LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, 
out_ptr[chan], "");
LLVMValueRef voffset =
lp_build_const_int32(gallivm, (i * 4 + chan) *
 
shader->selector->gs_max_out_vertices);
@@ -5171,30 +5181,34 @@ static void si_llvm_emit_vertex(

build_tbuffer_store(ctx,
ctx->gsvs_ring[stream],
out_val, 1,
voffset, soffset, 0,
V_008F0C_BUF_DATA_FORMAT_32,
   

Re: [Mesa-dev] [PATCH 1/2] mesa: Add GL and GLSL plumbing for ARB_post_depth_coverage for i965 (gen9+).

2016-11-30 Thread Manolova, Plamena
On Wed, Nov 30, 2016 at 4:36 PM, Ilia Mirkin  wrote:

> On Wed, Nov 30, 2016 at 9:19 AM, Lionel Landwerlin
>  wrote:
> > Also forgot that (like Ilia suggested for NV_image_formats) you can
> update
> > docs/relnotes/ to list the new feature.
>
> That would be appropriate for the patch that actually exposes the
> feature. This one just adds the core plumbing, which is generally not
> announced in release notes.
>
>   -ilia
>

I'll update the subsequent patch to include the change to the release notes.

Pam
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] mesa: Add GL and GLSL plumbing for ARB_post_depth_coverage for i965 (gen9+).

2016-11-30 Thread Manolova, Plamena
Thank you for reviewing guys! I'll go ahead and make that change.

Pam

On Wed, Nov 30, 2016 at 3:33 PM, Ilia Mirkin  wrote:

> On Nov 30, 2016 7:17 AM, "Lionel Landwerlin" <
> lionel.g.landwer...@intel.com> wrote:
> >
> > On 22/11/16 21:53, Plamena Manolova wrote:
> >>
> >> This extension allows the fragment shader to control whether values in
> >> gl_SampleMaskIn[] reflect the coverage after application of the early
> >> depth and stencil tests.
> >>
> >> Signed-off-by: Plamena Manolova 
> >> ---
> >>   src/compiler/glsl/ast.h  |  5 +
> >>   src/compiler/glsl/ast_to_hir.cpp |  5 +
> >>   src/compiler/glsl/ast_type.cpp   |  8 +++-
> >>   src/compiler/glsl/glsl_parser.yy | 11 +++
> >>   src/compiler/glsl/glsl_parser_extras.cpp |  4 
> >>   src/compiler/glsl/glsl_parser_extras.h   |  4 
> >>   src/compiler/glsl/linker.cpp |  4 
> >>   src/compiler/shader_info.h   |  1 +
> >>   src/mesa/main/extensions_table.h |  1 +
> >>   src/mesa/main/mtypes.h   |  2 ++
> >>   src/mesa/main/shaderapi.c|  1 +
> >>   11 files changed, 45 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
> >> index 55f9a6c..ad19493 100644
> >> --- a/src/compiler/glsl/ast.h
> >> +++ b/src/compiler/glsl/ast.h
> >> @@ -606,6 +606,11 @@ struct ast_type_qualifier {
> >>/** \{ */
> >>unsigned blend_support:1; /**< Are there any blend_support_
> qualifiers */
> >>/** \} */
> >> +
> >> + /**
> >> +  * Flag set if GL_ARB_post_depth_coverage layout qualifier is
> used.
> >> +  */
> >> + unsigned post_depth_coverage:1;
> >> }
> >> /** \brief Set of flags, accessed by name. */
> >> q;
> >> diff --git a/src/compiler/glsl/ast_to_hir.cpp
> b/src/compiler/glsl/ast_to_hir.cpp
> >> index 9b8678c..c31da86 100644
> >> --- a/src/compiler/glsl/ast_to_hir.cpp
> >> +++ b/src/compiler/glsl/ast_to_hir.cpp
> >> @@ -3632,6 +3632,11 @@ apply_layout_qualifier_to_variable(const struct
> ast_type_qualifier *qual,
> >> _mesa_glsl_error(loc, state, "early_fragment_tests layout
> qualifier only "
> >>  "valid in fragment shader input layout
> declaration.");
> >>  }
> >> +
> >> +   if (qual->flags.q.post_depth_coverage) {
> >> +  _mesa_glsl_error(loc, state, "post_depth_coverage layout
> qualifier only "
> >> +   "valid in fragment shader input layout
> declaration.");
> >> +   }
> >>   }
> >> static void
> >> diff --git a/src/compiler/glsl/ast_type.cpp
> b/src/compiler/glsl/ast_type.cpp
> >> index 2856f18..1905721 100644
> >> --- a/src/compiler/glsl/ast_type.cpp
> >> +++ b/src/compiler/glsl/ast_type.cpp
> >> @@ -489,6 +489,7 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE
> *loc,
> >> break;
> >>  case MESA_SHADER_FRAGMENT:
> >> valid_in_mask.flags.q.early_fragment_tests = 1;
> >> +  valid_in_mask.flags.q.post_depth_coverage = 1;
> >> break;
> >>  case MESA_SHADER_COMPUTE:
> >> create_cs_ast |=
> >> @@ -540,6 +541,10 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE
> *loc,
> >> state->fs_early_fragment_tests = true;
> >>  }
> >>   +   if (q.flags.q.post_depth_coverage) {
> >> +  state->fs_post_depth_coverage = true;
> >> +   }
> >> +
> >>  if (this->flags.q.vertex_spacing) {
> >> if (q.flags.q.vertex_spacing &&
> >> this->vertex_spacing != q.vertex_spacing) {
> >> @@ -671,7 +676,8 @@ ast_type_qualifier::validate_flags(YYLTYPE *loc,
> >>   bad.flags.q.point_mode ? " point_mode" : "",
> >>   bad.flags.q.vertices ? " vertices" : "",
> >>   bad.flags.q.subroutine ? " subroutine" : "",
> >> -bad.flags.q.subroutine_def ? " subroutine_def" :
> "");
> >> +bad.flags.q.subroutine_def ? " subroutine_def" :
> "",
> >> +bad.flags.q.post_depth_coverage ? "
> post_depth_coverage" : "");
> >>  return false;
> >>   }
> >>   diff --git a/src/compiler/glsl/glsl_parser.yy
> b/src/compiler/glsl/glsl_parser.yy
> >> index a48dc68..a53f476 100644
> >> --- a/src/compiler/glsl/glsl_parser.yy
> >> +++ b/src/compiler/glsl/glsl_parser.yy
> >> @@ -1373,6 +1373,17 @@ layout_qualifier_id:
> >> $$.flags.q.early_fragment_tests = 1;
> >>}
> >
> >
> > I wonder if the following check should include state->
> ARB_post_depth_coverage_enable as condition before matching the layout
> qualifier.
> > Maybe Ilia can confirm?
>
> Sounds right. A shader without the enable but wit the layout qualifier
> should fail to compile. There should be a test in piglit to that effect
> with glslparsertest.
>
> >
> > Thanks!
> >
> >
> >> +
> >> + if (!$$.flags.i &&
> >> + 

Re: [Mesa-dev] [PATCH 7/8] radeonsi: apply the double EVENT_WRITE_EOP workaround to VI as well

2016-11-30 Thread Nicolai Hähnle

On 30.11.2016 15:01, Nicolai Hähnle wrote:

On 30.11.2016 02:36, Marek Olšák wrote:

From: Marek Olšák 

Internal docs don't mention it, but they also don't mention that the bug
has been fixed (like other CI bugs fixed in VI).

Vulkan does this too.

Cc: 13.0 
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 15e8a12..bb4cd86 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -95,21 +95,22 @@ void r600_gfx_write_event_eop(struct
r600_common_context *ctx,
   unsigned event, unsigned event_flags,
   unsigned data_sel,
   struct r600_resource *buf, uint64_t va,
   uint32_t old_fence, uint32_t new_fence)
 {
 struct radeon_winsys_cs *cs = ctx->gfx.cs;
 unsigned op = EVENT_TYPE(event) |
   EVENT_INDEX(5) |
   event_flags;

-if (ctx->chip_class == CIK) {
+if (ctx->chip_class == CIK ||
+ctx->chip_class == VI) {


This needs a corresponding change in r600_gfx_write_fence_dwords.

Would be good to get a clarification from the HW team on this. Or maybe
dig up the corresponding bug in the internal bug tracker? I vaguely
recall seeing something in there, but maybe I'm getting it confused with
another bug.


Never mind, from the dates it's pretty clear that this must affect VI as 
well.


The other function still needs to be adjusted though. With that change 
you can add my R-b to this patch as well.


Nicolai


Nicolai


 /* Two EOP events are required to make all engines go idle
  * (and optional cache flushes executed) before the timestamp
  * is written.
  */
 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
 radeon_emit(cs, op);
 radeon_emit(cs, va);
 radeon_emit(cs, ((va >> 32) & 0x) | EOP_DATA_SEL(data_sel));
 radeon_emit(cs, old_fence); /* immediate data */
 radeon_emit(cs, 0); /* unused */


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] mesa: Add GL and GLSL plumbing for ARB_post_depth_coverage for i965 (gen9+).

2016-11-30 Thread Ilia Mirkin
On Wed, Nov 30, 2016 at 9:19 AM, Lionel Landwerlin
 wrote:
> Also forgot that (like Ilia suggested for NV_image_formats) you can update
> docs/relnotes/ to list the new feature.

That would be appropriate for the patch that actually exposes the
feature. This one just adds the core plumbing, which is generally not
announced in release notes.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] mesa: Add GL and GLSL plumbing for ARB_post_depth_coverage for i965 (gen9+).

2016-11-30 Thread Lionel Landwerlin
Also forgot that (like Ilia suggested for NV_image_formats) you can 
update docs/relnotes/ to list the new feature.


Cheers,

-
Lionel

On 30/11/16 13:33, Ilia Mirkin wrote:


On Nov 30, 2016 7:17 AM, "Lionel Landwerlin" 
> 
wrote:

>
> On 22/11/16 21:53, Plamena Manolova wrote:
>>
>> This extension allows the fragment shader to control whether values in
>> gl_SampleMaskIn[] reflect the coverage after application of the early
>> depth and stencil tests.
>>
>> Signed-off-by: Plamena Manolova >

>> ---
>>   src/compiler/glsl/ast.h  |  5 +
>>   src/compiler/glsl/ast_to_hir.cpp |  5 +
>>   src/compiler/glsl/ast_type.cpp   |  8 +++-
>>   src/compiler/glsl/glsl_parser.yy | 11 +++
>>   src/compiler/glsl/glsl_parser_extras.cpp |  4 
>>   src/compiler/glsl/glsl_parser_extras.h   |  4 
>>   src/compiler/glsl/linker.cpp |  4 
>>   src/compiler/shader_info.h   |  1 +
>>   src/mesa/main/extensions_table.h |  1 +
>>   src/mesa/main/mtypes.h   |  2 ++
>>   src/mesa/main/shaderapi.c|  1 +
>>   11 files changed, 45 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
>> index 55f9a6c..ad19493 100644
>> --- a/src/compiler/glsl/ast.h
>> +++ b/src/compiler/glsl/ast.h
>> @@ -606,6 +606,11 @@ struct ast_type_qualifier {
>>/** \{ */
>>unsigned blend_support:1; /**< Are there any 
blend_support_ qualifiers */

>>/** \} */
>> +
>> + /**
>> +  * Flag set if GL_ARB_post_depth_coverage layout 
qualifier is used.

>> +  */
>> + unsigned post_depth_coverage:1;
>> }
>> /** \brief Set of flags, accessed by name. */
>> q;
>> diff --git a/src/compiler/glsl/ast_to_hir.cpp 
b/src/compiler/glsl/ast_to_hir.cpp

>> index 9b8678c..c31da86 100644
>> --- a/src/compiler/glsl/ast_to_hir.cpp
>> +++ b/src/compiler/glsl/ast_to_hir.cpp
>> @@ -3632,6 +3632,11 @@ apply_layout_qualifier_to_variable(const 
struct ast_type_qualifier *qual,
>> _mesa_glsl_error(loc, state, "early_fragment_tests layout 
qualifier only "
>>  "valid in fragment shader input layout 
declaration.");

>>  }
>> +
>> +   if (qual->flags.q.post_depth_coverage) {
>> +  _mesa_glsl_error(loc, state, "post_depth_coverage layout 
qualifier only "
>> +   "valid in fragment shader input layout 
declaration.");

>> +   }
>>   }
>> static void
>> diff --git a/src/compiler/glsl/ast_type.cpp 
b/src/compiler/glsl/ast_type.cpp

>> index 2856f18..1905721 100644
>> --- a/src/compiler/glsl/ast_type.cpp
>> +++ b/src/compiler/glsl/ast_type.cpp
>> @@ -489,6 +489,7 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE 
*loc,

>> break;
>>  case MESA_SHADER_FRAGMENT:
>> valid_in_mask.flags.q.early_fragment_tests = 1;
>> +  valid_in_mask.flags.q.post_depth_coverage = 1;
>> break;
>>  case MESA_SHADER_COMPUTE:
>> create_cs_ast |=
>> @@ -540,6 +541,10 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE 
*loc,

>> state->fs_early_fragment_tests = true;
>>  }
>>   +   if (q.flags.q.post_depth_coverage) {
>> +  state->fs_post_depth_coverage = true;
>> +   }
>> +
>>  if (this->flags.q.vertex_spacing) {
>> if (q.flags.q.vertex_spacing &&
>> this->vertex_spacing != q.vertex_spacing) {
>> @@ -671,7 +676,8 @@ ast_type_qualifier::validate_flags(YYLTYPE *loc,
>>   bad.flags.q.point_mode ? " point_mode" : "",
>>   bad.flags.q.vertices ? " vertices" : "",
>>   bad.flags.q.subroutine ? " subroutine" : "",
>> -bad.flags.q.subroutine_def ? " subroutine_def" 
: "");
>> +bad.flags.q.subroutine_def ? " subroutine_def" 
: "",
>> +bad.flags.q.post_depth_coverage ? " 
post_depth_coverage" : "");

>>  return false;
>>   }
>>   diff --git a/src/compiler/glsl/glsl_parser.yy 
b/src/compiler/glsl/glsl_parser.yy

>> index a48dc68..a53f476 100644
>> --- a/src/compiler/glsl/glsl_parser.yy
>> +++ b/src/compiler/glsl/glsl_parser.yy
>> @@ -1373,6 +1373,17 @@ layout_qualifier_id:
>> $$.flags.q.early_fragment_tests = 1;
>>}
>
>
> I wonder if the following check should include state-> 
ARB_post_depth_coverage_enable as condition before matching the layout 
qualifier.

> Maybe Ilia can confirm?

Sounds right. A shader without the enable but wit the layout qualifier 
should fail to compile. There should be a test in piglit to that 
effect with glslparsertest.


>
> Thanks!
>
>
>> +
>> + if (!$$.flags.i &&
>> + match_layout_qualifier($1, "post_depth_coverage", 
state) == 0) {

>> +if (state->stage != MESA_SHADER_FRAGMENT) {
>> 

Re: [Mesa-dev] [AppVeyor] mesa master #2842 failed

2016-11-30 Thread Jose Fonseca
I didn't spot any relevant discussion on 
http://help.appveyor.com/discussions/problems or 
https://appveyor.statuspage.io/


It must have been something transient.

Jose

On 30/11/16 03:52, Brian Paul wrote:

We've had a few instances in the past where complex macro code caused
super long MSVC compile times.  But, AFAIK, nothing's changed recently
in the u_unfilled_gen.c code.

-Brian

On 11/29/2016 08:48 PM, Roland Scheidegger wrote:

It's the second time it reached a timeout today - albeit the first time
it didn't even start compiling...
No idea what's up, but it looks like things are really slow today, one
hour build time should be quite sufficient I think.

Roland


Am 30.11.2016 um 04:05 schrieb Ilia Mirkin:

"Build execution time has reached the maximum allowed time for your
plan (60 minutes)."

The last line in the log is "Generating
build\windows-x86-debug\gallium\auxiliary\indices\u_unfilled_gen.c".
Either way, I don't think it's my bad. Let me know if I've
misanalyzed.

   -ilia

On Tue, Nov 29, 2016 at 9:58 PM, AppVeyor  wrote:

Build mesa 2842 failed

Commit ddf0f097e7 by Ilia Mirkin on 11/24/2016 11:02 PM:
swr: [rasterizer jit] use signed integer representation for logic
op\n\nInstead of (incorrectly) biasing the snorm value to make it
look like
a\nunorm, just use signed integer math.\n\nThis fixes
arb_color_buffer_float-render GL_RGBA8_SNORM\n\nSigned-off-by: Ilia
Mirkin
\nReviewed-by: Tim Rowley


Configure your notification preferences




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev=DgIGaQ=uilaK90D4TOVoH58JNXRgQ=Ie7_encNUsqxbSRbqbNgofw0ITcfE8JKfaUjIQhncGA=B35xEz-XuSXGB0bDYYMPpiLbStv8RMIv1cozFqCDArg=AJ4fCLUEx4BBpdv4x55Zq7ciKpjJzRIgHycmOXXnqHI=






___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 98911] [Ironlake Mobile] Severe Artifacting in Minecraft and weird "screen leftovers" when recording desktop with latest Mesa

2016-11-30 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=98911

--- Comment #1 from paed808  ---
Here is a video I recorded of the issue.

https://www.youtube.com/watch?v=Iatb2PQPSFI

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 8/8] radeonsi: document a CP DMA bug that doesn't need a workaround yet

2016-11-30 Thread Nicolai Hähnle

Apart from the comment on patch 7, the series is

Reviewed-by: Nicolai Hähnle 

On 30.11.2016 02:36, Marek Olšák wrote:

From: Marek Olšák 

This one is easy to miss, because it's not documented in any internal doc.
---
 src/amd/common/sid.h | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index 7f598ca..3b3983f 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -144,21 +144,25 @@
 #define COPY_DATA_IMM   5
 #defineCOPY_DATA_DST_SEL(x)(((unsigned)(x) & 0xf) 
<< 8)
 #defineCOPY_DATA_COUNT_SEL (1 << 16)
 #defineCOPY_DATA_WR_CONFIRM(1 << 20)
 #define PKT3_PFP_SYNC_ME  0x42
 #define PKT3_SURFACE_SYNC  0x43 /* deprecated on CIK, use 
ACQUIRE_MEM */
 #define PKT3_ME_INITIALIZE 0x44 /* not on CIK */
 #define PKT3_COND_WRITE0x45
 #define PKT3_EVENT_WRITE   0x46
 #define PKT3_EVENT_WRITE_EOP   0x47
-#define PKT3_EVENT_WRITE_EOS   0x48
+/* CP DMA bug: Any use of CP_DMA.DST_SEL=TC must be avoided when EOS packets
+ * are used. Use DST_SEL=MC instead. For prefetch, use SRC_SEL=TC and
+ * DST_SEL=MC. Only CIK chips are affected.
+ */
+/*#define PKT3_EVENT_WRITE_EOS   0x48*/ /* fix CP DMA before 
uncommenting */
 #define PKT3_ONE_REG_WRITE 0x57 /* not on CIK */
 #define PKT3_ACQUIRE_MEM   0x58 /* new for CIK */
 #define PKT3_SET_CONFIG_REG0x68
 #define PKT3_SET_CONTEXT_REG   0x69
 #define PKT3_SET_SH_REG0x76
 #define PKT3_SET_SH_REG_OFFSET 0x77
 #define PKT3_SET_UCONFIG_REG   0x79 /* new for CIK */
 #define PKT3_LOAD_CONST_RAM0x80
 #define PKT3_WRITE_CONST_RAM   0x81
 #define PKT3_DUMP_CONST_RAM0x83


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/8] radeonsi: apply the double EVENT_WRITE_EOP workaround to VI as well

2016-11-30 Thread Nicolai Hähnle

On 30.11.2016 02:36, Marek Olšák wrote:

From: Marek Olšák 

Internal docs don't mention it, but they also don't mention that the bug
has been fixed (like other CI bugs fixed in VI).

Vulkan does this too.

Cc: 13.0 
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 15e8a12..bb4cd86 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -95,21 +95,22 @@ void r600_gfx_write_event_eop(struct r600_common_context 
*ctx,
  unsigned event, unsigned event_flags,
  unsigned data_sel,
  struct r600_resource *buf, uint64_t va,
  uint32_t old_fence, uint32_t new_fence)
 {
struct radeon_winsys_cs *cs = ctx->gfx.cs;
unsigned op = EVENT_TYPE(event) |
  EVENT_INDEX(5) |
  event_flags;

-   if (ctx->chip_class == CIK) {
+   if (ctx->chip_class == CIK ||
+   ctx->chip_class == VI) {


This needs a corresponding change in r600_gfx_write_fence_dwords.

Would be good to get a clarification from the HW team on this. Or maybe 
dig up the corresponding bug in the internal bug tracker? I vaguely 
recall seeing something in there, but maybe I'm getting it confused with 
another bug.


Nicolai


/* Two EOP events are required to make all engines go idle
 * (and optional cache flushes executed) before the timestamp
 * is written.
 */
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
radeon_emit(cs, op);
radeon_emit(cs, va);
radeon_emit(cs, ((va >> 32) & 0x) | EOP_DATA_SEL(data_sel));
radeon_emit(cs, old_fence); /* immediate data */
radeon_emit(cs, 0); /* unused */


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/3] gallium driver for Vivante GPUs

2016-11-30 Thread Christian Gmeiner
Hi all,

this time I send the patchset to the correct ml.
Had no coffee when sending it out for the first time :)

greets
--
Christian Gmeiner, MSc

https://soundcloud.com/christian-gmeiner
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] imx: gallium driver for imx-drm scanout driver

2016-11-30 Thread Christian Gmeiner
The imx (stub) driver is needed to get hardware acceleration from
etnaviv on a platform using imx-drm kms driver. This adds support
for wayland and native kms egl apps.

Signed-off-by: Christian Gmeiner 
---
 configure.ac   | 12 +++
 src/gallium/Makefile.am|  4 +++
 .../auxiliary/pipe-loader/pipe_loader_drm.c|  5 +++
 src/gallium/auxiliary/target-helpers/drm_helper.h  | 23 
 .../auxiliary/target-helpers/drm_helper_public.h   |  3 ++
 src/gallium/drivers/imx/Automake.inc   |  9 +
 src/gallium/drivers/imx/Makefile.am|  9 +
 src/gallium/winsys/imx/drm/Makefile.am | 33 +
 src/gallium/winsys/imx/drm/Makefile.sources|  3 ++
 src/gallium/winsys/imx/drm/imx_drm_public.h| 31 
 src/gallium/winsys/imx/drm/imx_drm_winsys.c| 41 ++
 11 files changed, 173 insertions(+)
 create mode 100644 src/gallium/drivers/imx/Automake.inc
 create mode 100644 src/gallium/drivers/imx/Makefile.am
 create mode 100644 src/gallium/winsys/imx/drm/Makefile.am
 create mode 100644 src/gallium/winsys/imx/drm/Makefile.sources
 create mode 100644 src/gallium/winsys/imx/drm/imx_drm_public.h
 create mode 100644 src/gallium/winsys/imx/drm/imx_drm_winsys.c

diff --git a/configure.ac b/configure.ac
index 83b23af..c0f81ce 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2447,6 +2447,9 @@ if test -n "$with_gallium_drivers"; then
 PKG_CHECK_MODULES([ETNAVIV], [libdrm_etnaviv >= 
$LIBDRM_ETNAVIV_REQUIRED])
 require_libdrm "etnaviv"
 ;;
+   ximx)
+HAVE_GALLIUM_IMX=yes
+;;
 xswrast)
 HAVE_GALLIUM_SOFTPIPE=yes
 if test "x$MESA_LLVM" = x1; then
@@ -2499,6 +2502,12 @@ if test "x$HAVE_RADEON_VULKAN" = "xyes"; then
 radeon_llvm_check "radv" "3" "9" "0"
 fi
 
+dnl We need to validate some needed dependencies for renderonly drivers.
+
+if test "x$HAVE_GALLIUM_ETNAVIV" != xyes -a "x$HAVE_GALLIUM_IMX" == xyes  ; 
then
+AC_ERROR([Building with imx requires etnaviv])
+fi
+
 dnl Set LLVM_LIBS - This is done after the driver configuration so
 dnl that drivers can add additional components to LLVM_COMPONENTS.
 dnl Previously, gallium drivers were updating LLVM_LIBS directly
@@ -2572,6 +2581,7 @@ AM_CONDITIONAL(HAVE_GALLIUM_SWRAST, test 
"x$HAVE_GALLIUM_SOFTPIPE" = xyes -o \
 AM_CONDITIONAL(HAVE_GALLIUM_VC4, test "x$HAVE_GALLIUM_VC4" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_VIRGL, test "x$HAVE_GALLIUM_VIRGL" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_ETNAVIV, test "x$HAVE_GALLIUM_ETNAVIV" = xyes)
+AM_CONDITIONAL(HAVE_GALLIUM_IMX, test "x$HAVE_GALLIUM_IMX" = xyes)
 
 AM_CONDITIONAL(HAVE_GALLIUM_STATIC_TARGETS, test 
"x$enable_shared_pipe_drivers" = xno)
 
@@ -2719,6 +2729,7 @@ AC_CONFIG_FILES([Makefile
src/gallium/drivers/swr/Makefile
src/gallium/drivers/trace/Makefile
src/gallium/drivers/etnaviv/Makefile
+   src/gallium/drivers/imx/Makefile
src/gallium/drivers/vc4/Makefile
src/gallium/drivers/virgl/Makefile
src/gallium/state_trackers/clover/Makefile
@@ -2749,6 +2760,7 @@ AC_CONFIG_FILES([Makefile
src/gallium/tests/trivial/Makefile
src/gallium/tests/unit/Makefile
src/gallium/winsys/etnaviv/drm/Makefile
+   src/gallium/winsys/imx/drm/Makefile
src/gallium/winsys/freedreno/drm/Makefile
src/gallium/winsys/i915/drm/Makefile
src/gallium/winsys/intel/drm/Makefile
diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am
index 9e47e9f..f910f31 100644
--- a/src/gallium/Makefile.am
+++ b/src/gallium/Makefile.am
@@ -72,6 +72,10 @@ if HAVE_GALLIUM_ETNAVIV
 SUBDIRS += drivers/etnaviv winsys/etnaviv/drm
 endif
 
+if HAVE_GALLIUM_IMX
+SUBDIRS += drivers/imx winsys/imx/drm
+endif
+
 ## swrast/softpipe
 if HAVE_GALLIUM_SOFTPIPE
 SUBDIRS += drivers/softpipe
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c 
b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index 99d9da6..6c89fe5 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -158,6 +158,11 @@ static const struct drm_driver_descriptor 
driver_descriptors[] = {
 .driver_name = "etnaviv",
 .create_screen = pipe_etna_create_screen,
 .configuration = configuration_query,
+},
+{
+.driver_name = "imx-drm",
+.create_screen = pipe_imx_drm_create_screen,
+.configuration = configuration_query,
 }
 };
 #endif
diff --git a/src/gallium/auxiliary/target-helpers/drm_helper.h 
b/src/gallium/auxiliary/target-helpers/drm_helper.h
index e056c58..fbb4ce4 100644
--- a/src/gallium/auxiliary/target-helpers/drm_helper.h
+++ 

[Mesa-dev] [PATCH 1/3] gallium: add renderonly library

2016-11-30 Thread Christian Gmeiner
This a very lightweight library to add basic support for
renderonly GPUs. It does all the magic regarding in/exporting
buffers etc. This library will likely break android support and
hopefully will get replaced with a better solution based on gbm2.

Signed-off-by: Christian Gmeiner 
---
 src/gallium/Automake.inc  |   5 +
 src/gallium/auxiliary/Makefile.am |  10 ++
 src/gallium/auxiliary/Makefile.sources|   4 +
 src/gallium/auxiliary/renderonly/renderonly.c | 199 ++
 src/gallium/auxiliary/renderonly/renderonly.h |  81 +++
 5 files changed, 299 insertions(+)
 create mode 100644 src/gallium/auxiliary/renderonly/renderonly.c
 create mode 100644 src/gallium/auxiliary/renderonly/renderonly.h

diff --git a/src/gallium/Automake.inc b/src/gallium/Automake.inc
index 6fe2e22..6aadcb9 100644
--- a/src/gallium/Automake.inc
+++ b/src/gallium/Automake.inc
@@ -50,6 +50,11 @@ GALLIUM_COMMON_LIB_DEPS = \
$(PTHREAD_LIBS) \
$(DLOPEN_LIBS)
 
+if HAVE_LIBDRM
+GALLIUM_COMMON_LIB_DEPS += \
+   $(LIBDRM_LIBS)
+endif
+
 GALLIUM_WINSYS_CFLAGS = \
-I$(top_srcdir)/src \
-I$(top_srcdir)/include \
diff --git a/src/gallium/auxiliary/Makefile.am 
b/src/gallium/auxiliary/Makefile.am
index 4a4a4fb..6b63cf1 100644
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -20,6 +20,16 @@ libgallium_la_SOURCES = \
$(NIR_SOURCES) \
$(GENERATED_SOURCES)
 
+if HAVE_LIBDRM
+
+AM_CFLAGS += \
+   $(LIBDRM_CFLAGS)
+
+libgallium_la_SOURCES += \
+   $(RENDERONLY_SOURCES)
+
+endif
+
 if HAVE_MESA_LLVM
 
 AM_CFLAGS += \
diff --git a/src/gallium/auxiliary/Makefile.sources 
b/src/gallium/auxiliary/Makefile.sources
index 5d4fe30..8d3e4a9 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -435,3 +435,7 @@ GALLIVM_SOURCES := \
draw/draw_llvm_sample.c \
draw/draw_pt_fetch_shade_pipeline_llvm.c \
draw/draw_vs_llvm.c
+
+RENDERONLY_SOURCES := \
+   renderonly/renderonly.c \
+   renderonly/renderonly.h
diff --git a/src/gallium/auxiliary/renderonly/renderonly.c 
b/src/gallium/auxiliary/renderonly/renderonly.c
new file mode 100644
index 000..c4ea784
--- /dev/null
+++ b/src/gallium/auxiliary/renderonly/renderonly.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright (C) 2016 Christian Gmeiner 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *Christian Gmeiner 
+ */
+
+#include "renderonly/renderonly.h"
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "state_tracker/drm_driver.h"
+#include "pipe/p_screen.h"
+#include "util/u_memory.h"
+
+struct pipe_screen *
+renderonly_screen_create(int fd, const struct renderonly_ops *ops, void *priv)
+{
+   struct renderonly *ro;
+
+   ro = CALLOC_STRUCT(renderonly);
+   if (!ro)
+  return NULL;
+
+   ro->kms_fd = fd;
+   ro->ops = ops;
+   ro->priv = priv;
+
+   ro->screen = ops->create(ro);
+   if (!ro->screen)
+  goto cleanup;
+
+   return ro->screen;
+
+cleanup:
+   FREE(ro);
+
+   return NULL;
+}
+
+static bool
+use_kms_bumb_buffer(struct renderonly_scanout *scanout,
+  struct pipe_resource *rsc, struct renderonly *ro)
+{
+   struct winsys_handle handle;
+   int prime_fd, err;
+   struct drm_mode_create_dumb create_dumb = {
+  .width = rsc->width0,
+  .height = rsc->height0,
+  .bpp = 32,
+   };
+   struct drm_mode_destroy_dumb destroy_dumb = { };
+
+   /* create dumb buffer at scanout GPU */
+   err = ioctl(ro->kms_fd, DRM_IOCTL_MODE_CREATE_DUMB, _dumb);
+   if (err < 0) {
+  fprintf(stderr, "DRM_IOCTL_MODE_CREATE_DUMB failed: %s\n",
+strerror(errno));
+  return false;
+   }
+
+   scanout->handle = create_dumb.handle;
+   scanout->stride = create_dumb.pitch;
+
+   /* 

[Mesa-dev] [PATCH 0/3] gallium driver for Vivante GPUs

2016-11-30 Thread Christian Gmeiner
As the original patchstack is now about 300 patches, I have choosen to
squash the patches together into three different parts.

- renderonly library
  A lightweight library to add basic infrastructure for renderonly
  GPUs. With this library it is possible to run wayland or and other
  kms egl apps.

- etnaviv gallium driver
  glxgears renders so its time to get this driver mainline.

- imx gallium driver
  Special dump driver for the imx-drm kms driver.

The renderonly library approach is a temporary workaround until 'gbm2'
is ready. I am aware that not everybody is happy about it but it helps
to increase the possible use cases like wayland and kms egl apps. Also
keep in mind that this library was only made for the embedded use case
and will not work with hybrid GPUs etc. If there are fundamental problems
with this library I will drop it and etnaviv will only work under X
(with armada-ddx from unstable-devel branch).

Some (older) results from piglit and deqp can be found here:
 https://christian-gmeiner.info/piglit
 https://christian-gmeiner.info/deqp

All individual patches can be found here:
 https://github.com/etnaviv/mesa/tree/for_mainline_v1

The squased one can be found here:
 https://github.com/etnaviv/mesa/tree/for_mainline_v1_squashed

As this work is a collaborative effort of Lucas Stack, Philipp Zabel,
Rob Herring, Russell King, Wladimir J. van der Laan and myself the
etnaviv gallium patch has a fictive author, but our signoff.

Happy reviewing!

Christian

PS: I hope I got all Signed-off's right - if not I am quite sorry and
will fix it in v2 or before pushing.
PPS: As not everybody is happy about the renderonly library and as I
am the only author I did not used the fictive author for the other
two patches.

Christian Gmeiner (2):
  gallium: add renderonly library
  imx: gallium driver for imx-drm scanout driver

The etnaviv authors (1):
  etnaviv: gallium driver for Vivante GPUs

 configure.ac   |   21 +
 src/gallium/Automake.inc   |5 +
 src/gallium/Makefile.am|8 +
 src/gallium/auxiliary/Makefile.am  |   10 +
 src/gallium/auxiliary/Makefile.sources |4 +
 .../auxiliary/pipe-loader/pipe_loader_drm.c|   10 +
 src/gallium/auxiliary/renderonly/renderonly.c  |  199 ++
 src/gallium/auxiliary/renderonly/renderonly.h  |   81 +
 src/gallium/auxiliary/target-helpers/drm_helper.h  |   45 +
 .../auxiliary/target-helpers/drm_helper_public.h   |6 +
 src/gallium/drivers/etnaviv/.gitignore |1 +
 src/gallium/drivers/etnaviv/Automake.inc   |   11 +
 src/gallium/drivers/etnaviv/Makefile.am|   44 +
 src/gallium/drivers/etnaviv/Makefile.sources   |   26 +
 src/gallium/drivers/etnaviv/etnaviv_asm.c  |  107 +
 src/gallium/drivers/etnaviv/etnaviv_asm.h  |  133 +
 src/gallium/drivers/etnaviv/etnaviv_blend.c|  109 +
 src/gallium/drivers/etnaviv/etnaviv_blend.h|   52 +
 src/gallium/drivers/etnaviv/etnaviv_clear_blit.c   |  640 +
 src/gallium/drivers/etnaviv/etnaviv_clear_blit.h   |   48 +
 src/gallium/drivers/etnaviv/etnaviv_compiler.c | 2564 
 src/gallium/drivers/etnaviv/etnaviv_compiler.h |  120 +
 .../drivers/etnaviv/etnaviv_compiler_cmdline.c |  146 ++
 src/gallium/drivers/etnaviv/etnaviv_context.c  |  323 +++
 src/gallium/drivers/etnaviv/etnaviv_context.h  |  194 ++
 src/gallium/drivers/etnaviv/etnaviv_debug.h|   79 +
 src/gallium/drivers/etnaviv/etnaviv_disasm.c   |  612 +
 src/gallium/drivers/etnaviv/etnaviv_disasm.h   |   40 +
 src/gallium/drivers/etnaviv/etnaviv_emit.c |  770 ++
 src/gallium/drivers/etnaviv/etnaviv_emit.h |  123 +
 src/gallium/drivers/etnaviv/etnaviv_fence.c|   87 +
 src/gallium/drivers/etnaviv/etnaviv_fence.h|   39 +
 src/gallium/drivers/etnaviv/etnaviv_format.c   |  268 ++
 src/gallium/drivers/etnaviv/etnaviv_format.h   |   47 +
 src/gallium/drivers/etnaviv/etnaviv_internal.h |  259 ++
 src/gallium/drivers/etnaviv/etnaviv_query.c|  120 +
 src/gallium/drivers/etnaviv/etnaviv_query.h|   64 +
 src/gallium/drivers/etnaviv/etnaviv_query_sw.c |  123 +
 src/gallium/drivers/etnaviv/etnaviv_query_sw.h |   47 +
 src/gallium/drivers/etnaviv/etnaviv_rasterizer.c   |   81 +
 src/gallium/drivers/etnaviv/etnaviv_rasterizer.h   |   57 +
 src/gallium/drivers/etnaviv/etnaviv_resource.c |  438 
 src/gallium/drivers/etnaviv/etnaviv_resource.h |  148 ++
 src/gallium/drivers/etnaviv/etnaviv_rs.c   |  127 +
 src/gallium/drivers/etnaviv/etnaviv_rs.h   |   86 +
 src/gallium/drivers/etnaviv/etnaviv_screen.c   |  804 ++
 src/gallium/drivers/etnaviv/etnaviv_screen.h   |   94 +
 src/gallium/drivers/etnaviv/etnaviv_shader.c   |  285 +++
 src/gallium/drivers/etnaviv/etnaviv_shader.h   |   45 +
 

[Mesa-dev] [PATCH 22/23] radeonsi: shrink each vertex stream to the actually required size

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/gallium/drivers/radeonsi/si_shader.c| 26 +++--
 src/gallium/drivers/radeonsi/si_state_shaders.c | 37 +++--
 2 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index fba98e4..a0728f5 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5697,59 +5697,67 @@ static void preload_ring_buffers(struct 
si_shader_context *ctx)
ctx->esgs_ring =
build_indexed_load_const(ctx, buf_ptr, offset);
}
 
if (ctx->shader->is_gs_copy_shader) {
LLVMValueRef offset = lp_build_const_int32(gallivm, 
SI_RING_GSVS);
 
ctx->gsvs_ring[0] =
build_indexed_load_const(ctx, buf_ptr, offset);
} else if (ctx->type == PIPE_SHADER_GEOMETRY) {
+   const struct si_shader_selector *sel = ctx->shader->selector;
struct lp_build_context *uint = >soa.bld_base.uint_bld;
LLVMValueRef offset = lp_build_const_int32(gallivm, 
SI_RING_GSVS);
LLVMValueRef base_ring;
 
base_ring = build_indexed_load_const(ctx, buf_ptr, offset);
 
/* The conceptual layout of the GSVS ring is
 *   v0c0 .. vLv0 v0c1 .. vLc1 ..
 * but the real memory layout is swizzled across
 * threads:
 *   t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
 *   t16v0c0 ..
 * Override the buffer descriptor accordingly.
 */
LLVMTypeRef v2i64 = LLVMVectorType(ctx->i64, 2);
-   unsigned max_gsvs_emit_size = 
ctx->shader->selector->max_gsvs_emit_size;
-   unsigned num_records;
-
-   num_records = 64;
-   if (ctx->screen->b.chip_class >= VI)
-   num_records *= max_gsvs_emit_size;
+   uint64_t stream_offset = 0;
 
for (unsigned stream = 0; stream < 4; ++stream) {
+   unsigned num_components;
+   unsigned stride;
+   unsigned num_records;
LLVMValueRef ring, tmp;
 
-   if 
(!ctx->shader->selector->info.num_stream_output_components[stream])
+   num_components = 
sel->info.num_stream_output_components[stream];
+   if (!num_components)
continue;
 
+   stride = 4 * num_components * sel->gs_max_out_vertices;
+
+   num_records = 64;
+   if (ctx->screen->b.chip_class >= VI)
+   num_records *= stride;
+
ring = LLVMBuildBitCast(builder, base_ring, v2i64, "");
tmp = LLVMBuildExtractElement(builder, ring, 
uint->zero, "");
tmp = LLVMBuildAdd(builder, tmp,
   LLVMConstInt(ctx->i64,
-   max_gsvs_emit_size * 64 
* stream, 0), "");
+   stream_offset, 0), "");
+   stream_offset += stride * 64;
+
ring = LLVMBuildInsertElement(builder, ring, tmp, 
uint->zero, "");
ring = LLVMBuildBitCast(builder, ring, ctx->v4i32, "");
tmp = LLVMBuildExtractElement(builder, ring, uint->one, 
"");
tmp = LLVMBuildOr(builder, tmp,
LLVMConstInt(ctx->i32,
-
S_008F04_STRIDE(max_gsvs_emit_size) |
+S_008F04_STRIDE(stride) |
 S_008F04_SWIZZLE_ENABLE(1), 0), 
"");
ring = LLVMBuildInsertElement(builder, ring, tmp, 
uint->one, "");
ring = LLVMBuildInsertElement(builder, ring,
LLVMConstInt(ctx->i32, num_records, 0),
LLVMConstInt(ctx->i32, 2, 0), "");
ring = LLVMBuildInsertElement(builder, ring,
LLVMConstInt(ctx->i32,
 
S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
 
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
 
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 1e9f5f0..151ed17 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -461,48 +461,55 @@ static uint32_t 

[Mesa-dev] [PATCH 17/23] radeonsi: fetch only outputs of current vertex stream from the GSVS ring

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/gallium/drivers/radeonsi/si_shader.c | 41 +++-
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 8db31c7..6aaf318 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6245,61 +6245,70 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
args[8] = uint->zero; /* TFE */
 
/* Fetch the vertex stream ID.*/
LLVMValueRef stream_id;
 
if (gs_selector->so.num_outputs)
stream_id = unpack_param(, ctx.param_streamout_config, 24, 
2);
else
stream_id = uint->zero;
 
-   /* Fetch vertex data from GSVS ring */
+   /* Fill in output information. */
for (i = 0; i < gsinfo->num_outputs; ++i) {
-   unsigned chan;
-
outputs[i].semantic_name = gsinfo->output_semantic_name[i];
outputs[i].semantic_index = gsinfo->output_semantic_index[i];
 
-   for (chan = 0; chan < 4; chan++) {
+   for (int chan = 0; chan < 4; chan++) {
outputs[i].vertex_stream[chan] =
(gsinfo->output_streams[i] >> (2 * chan)) & 3;
-
-   args[2] = lp_build_const_int32(gallivm,
-  (i * 4 + chan) *
-  
gs_selector->gs_max_out_vertices * 16 * 4);
-
-   outputs[i].values[chan] =
-   LLVMBuildBitCast(gallivm->builder,
-
lp_build_intrinsic(gallivm->builder,
-
"llvm.SI.buffer.load.dword.i32.i32",
-ctx.i32, args, 
9,
-
LP_FUNC_ATTR_READONLY),
-ctx.f32, "");
}
}
 
for (int stream = 0; stream < 4; stream++) {
struct lp_build_if_state if_ctx_stream;
 
if (!gsinfo->num_stream_output_components[stream])
continue;
 
if (stream > 0 && !gs_selector->so.num_outputs)
continue;
 
LLVMValueRef is_stream =
LLVMBuildICmp(builder, LLVMIntEQ,
  stream_id,
  lp_build_const_int32(gallivm, stream), 
"");
 
lp_build_if(_ctx_stream, gallivm, is_stream);
 
+   /* Fetch vertex data from GSVS ring */
+   for (i = 0; i < gsinfo->num_outputs; ++i) {
+   for (unsigned chan = 0; chan < 4; chan++) {
+   if (outputs[i].vertex_stream[chan] != stream) {
+   outputs[i].values[chan] = 
ctx.soa.bld_base.base.undef;
+   continue;
+   }
+
+   args[2] = lp_build_const_int32(
+   gallivm,
+   (i * 4 + chan) * 
gs_selector->gs_max_out_vertices * 16 * 4);
+
+   outputs[i].values[chan] =
+   LLVMBuildBitCast(gallivm->builder,
+
lp_build_intrinsic(gallivm->builder,
+
"llvm.SI.buffer.load.dword.i32.i32",
+ctx.i32, args, 
9,
+
LP_FUNC_ATTR_READONLY),
+ctx.f32, "");
+   }
+   }
+
+   /* Streamout and exports. */
if (gs_selector->so.num_outputs) {
si_llvm_emit_streamout(, outputs,
   gsinfo->num_outputs,
   stream);
}
 
if (stream == 0)
si_llvm_export_vs(bld_base, outputs, 
gsinfo->num_outputs);
 
lp_build_endif(_ctx_stream);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/23] glsl: remember per-component vertex streams for packed varyings

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/compiler/glsl/ir.h  |  3 +++
 src/compiler/glsl/ir_print_visitor.cpp  | 14 --
 src/compiler/glsl/lower_packed_varyings.cpp |  9 +
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h
index 24f510e..df3ccfd 100644
--- a/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@ -910,20 +910,23 @@ public:
   /**
* for glsl->tgsi/mesa IR we need to store the index into the
* parameters for uniforms, initially the code overloaded location
* but this causes problems with indirect samplers and AoA.
* This is assigned in _mesa_generate_parameters_list_for_uniforms.
*/
   int param_index;
 
   /**
* Vertex stream output identifier.
+   *
+   * For packed outputs, bit 31 is set and bits [2*i+1,2*i] indicate the
+   * stream of the i-th component.
*/
   unsigned stream;
 
   /**
* Atomic, transform feedback or block member offset.
*/
   unsigned offset;
 
   /**
* Highest element accessed with a constant expression array index
diff --git a/src/compiler/glsl/ir_print_visitor.cpp 
b/src/compiler/glsl/ir_print_visitor.cpp
index 703169e..2b77c14 100644
--- a/src/compiler/glsl/ir_print_visitor.cpp
+++ b/src/compiler/glsl/ir_print_visitor.cpp
@@ -170,37 +170,47 @@ void ir_print_visitor::visit(ir_variable *ir)
   snprintf(binding, sizeof(binding), "binding=%i ", ir->data.binding);
 
char loc[32] = {0};
if (ir->data.location != -1)
   snprintf(loc, sizeof(loc), "location=%i ", ir->data.location);
 
char component[32] = {0};
if (ir->data.explicit_component)
   snprintf(component, sizeof(component), "component=%i ", 
ir->data.location_frac);
 
+   char stream[32] = {0};
+   if (ir->data.stream & (1u << 31)) {
+  if (ir->data.stream & ~(1u << 31)) {
+ snprintf(stream, sizeof(stream), "stream(%u,%u,%u,%u)",
+  ir->data.stream & 3, (ir->data.stream >> 2) & 3,
+  (ir->data.stream >> 4) & 3, (ir->data.stream >> 6) & 3);
+  }
+   } else if (ir->data.stream) {
+  snprintf(stream, sizeof(stream), "stream%u", ir->data.stream);
+   }
+
const char *const cent = (ir->data.centroid) ? "centroid " : "";
const char *const samp = (ir->data.sample) ? "sample " : "";
const char *const patc = (ir->data.patch) ? "patch " : "";
const char *const inv = (ir->data.invariant) ? "invariant " : "";
const char *const prec = (ir->data.precise) ? "precise " : "";
const char *const mode[] = { "", "uniform ", "shader_storage ",
 "shader_shared ", "shader_in ", "shader_out ",
 "in ", "out ", "inout ",
"const_in ", "sys ", "temporary " };
STATIC_ASSERT(ARRAY_SIZE(mode) == ir_var_mode_count);
-   const char *const stream [] = {"", "stream1 ", "stream2 ", "stream3 "};
const char *const interp[] = { "", "smooth", "flat", "noperspective" };
STATIC_ASSERT(ARRAY_SIZE(interp) == INTERP_MODE_COUNT);
 
fprintf(f, "(%s%s%s%s%s%s%s%s%s%s%s) ",
binding, loc, component, cent, samp, patc, inv, prec, 
mode[ir->data.mode],
-   stream[ir->data.stream],
+   stream,
interp[ir->data.interpolation]);
 
print_type(f, ir->type);
fprintf(f, " %s)", unique_name(ir));
 }
 
 
 void ir_print_visitor::visit(ir_function_signature *ir)
 {
_mesa_symbol_table_push_scope(symbols);
diff --git a/src/compiler/glsl/lower_packed_varyings.cpp 
b/src/compiler/glsl/lower_packed_varyings.cpp
index b16f25f..7a2f187 100644
--- a/src/compiler/glsl/lower_packed_varyings.cpp
+++ b/src/compiler/glsl/lower_packed_varyings.cpp
@@ -524,20 +524,28 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue 
*rvalue,
*/
   unsigned swizzle_values[4] = { 0, 0, 0, 0 };
   unsigned components = rvalue->type->vector_elements * dmul;
   unsigned location = fine_location / 4;
   unsigned location_frac = fine_location % 4;
   for (unsigned i = 0; i < components; ++i)
  swizzle_values[i] = i + location_frac;
   ir_dereference *packed_deref =
  this->get_packed_varying_deref(location, unpacked_var, name,
 vertex_index);
+  if (unpacked_var->data.stream != 0) {
+ assert(unpacked_var->data.stream < 4);
+ ir_variable *packed_var = packed_deref->variable_referenced();
+ for (unsigned i = 0; i < components; ++i) {
+packed_var->data.stream |=
+   unpacked_var->data.stream << (2 * (location_frac + i));
+ }
+  }
   ir_swizzle *swizzle = new(this->mem_ctx)
  ir_swizzle(packed_deref, swizzle_values, components);
   if (this->mode == ir_var_shader_out) {
  this->bitwise_assign_pack(swizzle, rvalue);
   } else {
  

[Mesa-dev] [PATCH 21/23] radeonsi: use a single descriptor for the GSVS ring

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

We can hardcode all of the fields for swizzling in the geometry shader.

The advantage is that we use fewer descriptor slots and we no longer have to
update any of the (ring) descriptors when the geometry shader changes.
---
 src/gallium/drivers/radeonsi/si_pipe.h  |  1 -
 src/gallium/drivers/radeonsi/si_shader.c| 70 ++---
 src/gallium/drivers/radeonsi/si_state.h |  6 +--
 src/gallium/drivers/radeonsi/si_state_shaders.c | 37 +
 4 files changed, 64 insertions(+), 50 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 42cbecb..2409c85 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -315,21 +315,20 @@ struct si_context {
int last_primitive_restart_en;
int last_restart_index;
int last_gs_out_prim;
int last_prim;
int last_multi_vgt_param;
int last_rast_prim;
unsignedlast_sc_line_stipple;
int last_vtx_reuse_depth;
int current_rast_prim; /* primitive type after TES, 
GS */
boolgs_tri_strip_adj_fix;
-   unsignedlast_gsvs_itemsize;
 
/* Scratch buffer */
struct r600_resource*scratch_buffer;
boolemit_scratch_reloc;
unsignedscratch_waves;
unsignedspi_tmpring_size;
 
struct r600_resource*compute_scratch_buffer;
 
/* Emitted derived tessellation state. */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 8c1f458..fba98e4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5672,51 +5672,105 @@ static void create_function(struct si_shader_context 
*ctx)
 }
 
 /**
  * Load ESGS and GSVS ring buffer resource descriptors and save the variables
  * for later use.
  */
 static void preload_ring_buffers(struct si_shader_context *ctx)
 {
struct gallivm_state *gallivm =
ctx->soa.bld_base.base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
 
LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
SI_PARAM_RW_BUFFERS);
 
if ((ctx->type == PIPE_SHADER_VERTEX &&
 ctx->shader->key.as_es) ||
(ctx->type == PIPE_SHADER_TESS_EVAL &&
 ctx->shader->key.as_es) ||
ctx->type == PIPE_SHADER_GEOMETRY) {
unsigned ring =
ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS
 : SI_ES_RING_ESGS;
LLVMValueRef offset = lp_build_const_int32(gallivm, ring);
 
ctx->esgs_ring =
build_indexed_load_const(ctx, buf_ptr, offset);
}
 
if (ctx->shader->is_gs_copy_shader) {
-   LLVMValueRef offset = lp_build_const_int32(gallivm, 
SI_VS_RING_GSVS);
+   LLVMValueRef offset = lp_build_const_int32(gallivm, 
SI_RING_GSVS);
 
ctx->gsvs_ring[0] =
build_indexed_load_const(ctx, buf_ptr, offset);
-   }
-   if (ctx->type == PIPE_SHADER_GEOMETRY) {
-   int i;
-   for (i = 0; i < 4; i++) {
-   LLVMValueRef offset = lp_build_const_int32(gallivm, 
SI_GS_RING_GSVS0 + i);
+   } else if (ctx->type == PIPE_SHADER_GEOMETRY) {
+   struct lp_build_context *uint = >soa.bld_base.uint_bld;
+   LLVMValueRef offset = lp_build_const_int32(gallivm, 
SI_RING_GSVS);
+   LLVMValueRef base_ring;
+
+   base_ring = build_indexed_load_const(ctx, buf_ptr, offset);
+
+   /* The conceptual layout of the GSVS ring is
+*   v0c0 .. vLv0 v0c1 .. vLc1 ..
+* but the real memory layout is swizzled across
+* threads:
+*   t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
+*   t16v0c0 ..
+* Override the buffer descriptor accordingly.
+*/
+   LLVMTypeRef v2i64 = LLVMVectorType(ctx->i64, 2);
+   unsigned max_gsvs_emit_size = 
ctx->shader->selector->max_gsvs_emit_size;
+   unsigned num_records;
+
+   num_records = 64;
+   if (ctx->screen->b.chip_class >= VI)
+   num_records *= max_gsvs_emit_size;
+
+   for (unsigned stream = 0; stream < 4; ++stream) {
+   LLVMValueRef ring, tmp;
+
+   if 
(!ctx->shader->selector->info.num_stream_output_components[stream])
+

[Mesa-dev] [PATCH 23/23] radeonsi: shrink the GSVS ring to account for the reduced item sizes

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 151ed17..4a7f638 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1954,21 +1954,21 @@ static bool si_update_gs_ring_buffers(struct si_context 
*sctx)
unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
 
/* Calculate the minimum size. */
unsigned min_esgs_ring_size = align(es->esgs_itemsize * gs_vertex_reuse 
*
wave_size, alignment);
 
/* These are recommended sizes, not minimum sizes. */
unsigned esgs_ring_size = max_gs_waves * 2 * wave_size *
  es->esgs_itemsize * 
gs->gs_input_verts_per_prim;
unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size *
- gs->max_gsvs_emit_size * (gs->max_gs_stream + 
1);
+ gs->max_gsvs_emit_size;
 
min_esgs_ring_size = align(min_esgs_ring_size, alignment);
esgs_ring_size = align(esgs_ring_size, alignment);
gsvs_ring_size = align(gsvs_ring_size, alignment);
 
esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
 
/* Some rings don't have to be allocated if shaders don't use them.
 * (e.g. no varyings between ES and GS or GS and VS)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/23] radeonsi: pull iteration over vertex streams into GS copy shader logic

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

The iteration is not needed for normal vertex shaders.
---
 src/gallium/drivers/radeonsi/si_shader.c | 62 +++-
 1 file changed, 37 insertions(+), 25 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index fd2ed42..cd2fd09 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2169,64 +2169,50 @@ static void emit_streamout_output(struct 
si_shader_context *ctx,
break;
}
 
build_tbuffer_store_dwords(ctx, so_buffers[buf_idx],
   vdata, num_comps,
   so_write_offsets[buf_idx],
   LLVMConstInt(ctx->i32, 0, 0),
   stream_out->dst_offset * 4);
 }
 
-/* On SI, the vertex shader is responsible for writing streamout data
- * to buffers. */
+/**
+ * Write streamout data to buffers for vertex stream @p stream (different
+ * vertex streams can occur for GS copy shaders).
+ */
 static void si_llvm_emit_streamout(struct si_shader_context *ctx,
   struct si_shader_output_values *outputs,
-  unsigned noutput)
+  unsigned noutput, unsigned stream)
 {
struct si_shader_selector *sel = ctx->shader->selector;
struct pipe_stream_output_info *so = >so;
struct gallivm_state *gallivm = >gallivm;
LLVMBuilderRef builder = gallivm->builder;
int i;
struct lp_build_if_state if_ctx;
 
/* Get bits [22:16], i.e. (so_param >> 16) & 127; */
LLVMValueRef so_vtx_count =
unpack_param(ctx, ctx->param_streamout_config, 16, 7);
 
LLVMValueRef tid = get_thread_id(ctx);
 
/* can_emit = tid < so_vtx_count; */
LLVMValueRef can_emit =
LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
 
-   LLVMValueRef stream_id =
-   unpack_param(ctx, ctx->param_streamout_config, 24, 2);
-
/* Emit the streamout code conditionally. This actually avoids
 * out-of-bounds buffer access. The hw tells us via the SGPR
 * (so_vtx_count) which threads are allowed to emit streamout data. */
lp_build_if(_ctx, gallivm, can_emit);
-
-   for (int stream = 0; stream < 4; ++stream) {
-   struct lp_build_if_state if_ctx_stream;
-
-   if (!sel->info.num_stream_output_components[stream])
-   continue;
-
-   LLVMValueRef is_stream =
-   LLVMBuildICmp(builder, LLVMIntEQ,
- stream_id,
- lp_build_const_int32(gallivm, stream), 
"");
-
-   lp_build_if(_ctx_stream, gallivm, is_stream);
-
+   {
/* The buffer offset is computed as follows:
 *   ByteOffset = streamout_offset[buffer_id]*4 +
 *(streamout_write_index + 
thread_id)*stride[buffer_id] +
 *attrib_offset
  */
 
LLVMValueRef so_write_index =
LLVMGetParam(ctx->main_fn,
 ctx->param_streamout_write_index);
 
@@ -2264,22 +2250,20 @@ static void si_llvm_emit_streamout(struct 
si_shader_context *ctx,
 
if (reg >= noutput)
continue;
 
if (stream != so->output[i].stream)
continue;
 
emit_streamout_output(ctx, so_buffers, so_write_offset,
  >output[i], [reg]);
}
-
-   lp_build_endif(_ctx_stream);
}
lp_build_endif(_ctx);
 }
 
 
 /* Generate export instructions for hardware VS shader stage */
 static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
  struct si_shader_output_values *outputs,
  unsigned noutput)
 {
@@ -2828,21 +2812,21 @@ static void si_llvm_emit_vs_epilogue(struct 
lp_build_tgsi_context *bld_base)
 
/* Return the primitive ID from the LLVM function. */
ctx->return_value =
LLVMBuildInsertValue(gallivm->builder,
 ctx->return_value,
 bitcast(bld_base, TGSI_TYPE_FLOAT,
 get_primitive_id(bld_base, 0)),
 VS_EPILOG_PRIMID_LOC, "");
 
if (ctx->shader->selector->so.num_outputs)
-   si_llvm_emit_streamout(ctx, outputs, i);
+   si_llvm_emit_streamout(ctx, outputs, i, 0);
si_llvm_export_vs(bld_base, outputs, i);
FREE(outputs);
 }
 
 struct si_ps_exports {

[Mesa-dev] [PATCH 05/23] st/glsl_to_tgsi: plumb the GS output stream qualifier through to TGSI

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Allow drivers to emit GS outputs in a smarter way.
---
 src/gallium/auxiliary/tgsi/tgsi_ureg.c | 16 +++-
 src/gallium/auxiliary/tgsi/tgsi_ureg.h |  1 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 10 ++
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c 
b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 7bcd242..9a6f8e5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -120,20 +120,21 @@ struct ureg_program
 
struct {
   unsigned semantic_name;
   unsigned semantic_index;
} system_value[UREG_MAX_SYSTEM_VALUE];
unsigned nr_system_values;
 
struct {
   unsigned semantic_name;
   unsigned semantic_index;
+  unsigned streams;
   unsigned usage_mask; /* = TGSI_WRITEMASK_* */
   unsigned first;
   unsigned last;
   unsigned array_id;
} output[UREG_MAX_OUTPUT];
unsigned nr_outputs, nr_output_regs;
 
struct {
   union {
  float f[4];
@@ -402,43 +403,46 @@ ureg_DECL_system_value(struct ureg_program *ureg,
 
 out:
return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, i);
 }
 
 
 struct ureg_dst
 ureg_DECL_output_layout(struct ureg_program *ureg,
 unsigned semantic_name,
 unsigned semantic_index,
+unsigned streams,
 unsigned index,
 unsigned usage_mask,
 unsigned array_id,
 unsigned array_size)
 {
unsigned i;
 
assert(usage_mask != 0);
 
for (i = 0; i < ureg->nr_outputs; i++) {
   if (ureg->output[i].semantic_name == semantic_name &&
   ureg->output[i].semantic_index == semantic_index) {
+ assert(ureg->output[i].streams == streams);
  if (ureg->output[i].array_id == array_id) {
 ureg->output[i].usage_mask |= usage_mask;
 goto out;
  }
  assert((ureg->output[i].usage_mask & usage_mask) == 0);
   }
}
 
if (ureg->nr_outputs < UREG_MAX_OUTPUT) {
   ureg->output[i].semantic_name = semantic_name;
   ureg->output[i].semantic_index = semantic_index;
+  ureg->output[i].streams = streams;
   ureg->output[i].usage_mask = usage_mask;
   ureg->output[i].first = index;
   ureg->output[i].last = index + array_size - 1;
   ureg->output[i].array_id = array_id;
   ureg->nr_output_regs = MAX2(ureg->nr_output_regs, index + array_size);
   ureg->nr_outputs++;
}
else {
   set_bad( ureg );
}
@@ -450,21 +454,21 @@ out:
 
 
 struct ureg_dst
 ureg_DECL_output_masked(struct ureg_program *ureg,
 unsigned name,
 unsigned index,
 unsigned usage_mask,
 unsigned array_id,
 unsigned array_size)
 {
-   return ureg_DECL_output_layout(ureg, name, index,
+   return ureg_DECL_output_layout(ureg, name, index, 0,
   ureg->nr_output_regs, usage_mask, array_id, 
array_size);
 }
 
 
 struct ureg_dst 
 ureg_DECL_output(struct ureg_program *ureg,
  unsigned name,
  unsigned index)
 {
return ureg_DECL_output_masked(ureg, name, index, TGSI_WRITEMASK_XYZW,
@@ -1547,40 +1551,45 @@ ureg_memory_insn(struct ureg_program *ureg,
 }
 
 
 static void
 emit_decl_semantic(struct ureg_program *ureg,
unsigned file,
unsigned first,
unsigned last,
unsigned semantic_name,
unsigned semantic_index,
+   unsigned streams,
unsigned usage_mask,
unsigned array_id)
 {
union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3);
 
out[0].value = 0;
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
out[0].decl.NrTokens = 3;
out[0].decl.File = file;
out[0].decl.UsageMask = usage_mask;
out[0].decl.Semantic = 1;
out[0].decl.Array = array_id != 0;
 
out[1].value = 0;
out[1].decl_range.First = first;
out[1].decl_range.Last = last;
 
out[2].value = 0;
out[2].decl_semantic.Name = semantic_name;
out[2].decl_semantic.Index = semantic_index;
+   out[2].decl_semantic.StreamX = streams & 3;
+   out[2].decl_semantic.StreamY = (streams >> 2) & 3;
+   out[2].decl_semantic.StreamZ = (streams >> 4) & 3;
+   out[2].decl_semantic.StreamW = (streams >> 6) & 3;
 
if (array_id) {
   out[3].value = 0;
   out[3].array.ArrayID = array_id;
}
 }
 
 
 static void
 emit_decl_fs(struct ureg_program *ureg,
@@ -1871,70 +1880,75 @@ static void emit_decls( struct ureg_program *ureg )
   }
} else {
   if (ureg->supports_any_inout_decl_range) {
  for (i = 0; i < ureg->nr_inputs; i++) {
 emit_decl_semantic(ureg,
   

[Mesa-dev] [PATCH 16/23] radeonsi: only export from GS copy shader for vertex stream 0

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

When running the copy shader for vertex streams != 0, the SX does not need
any data from us (there is no rasterization for the higher vertex streams,
only streamout).
---
 src/gallium/drivers/radeonsi/si_shader.c | 31 +++
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index b2cf44a..8db31c7 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6270,40 +6270,47 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
outputs[i].values[chan] =
LLVMBuildBitCast(gallivm->builder,
 
lp_build_intrinsic(gallivm->builder,
 
"llvm.SI.buffer.load.dword.i32.i32",
 ctx.i32, args, 
9,
 
LP_FUNC_ATTR_READONLY),
 ctx.f32, "");
}
}
 
-   if (gs_selector->so.num_outputs) {
-   for (int stream = 0; stream < 4; stream++) {
-   struct lp_build_if_state if_ctx_stream;
+   for (int stream = 0; stream < 4; stream++) {
+   struct lp_build_if_state if_ctx_stream;
 
-   if (!gsinfo->num_stream_output_components[stream])
-   continue;
+   if (!gsinfo->num_stream_output_components[stream])
+   continue;
+
+   if (stream > 0 && !gs_selector->so.num_outputs)
+   continue;
 
-   LLVMValueRef is_stream =
-   LLVMBuildICmp(builder, LLVMIntEQ,
- stream_id,
- lp_build_const_int32(gallivm, 
stream), "");
+   LLVMValueRef is_stream =
+   LLVMBuildICmp(builder, LLVMIntEQ,
+ stream_id,
+ lp_build_const_int32(gallivm, stream), 
"");
 
-   lp_build_if(_ctx_stream, gallivm, is_stream);
+   lp_build_if(_ctx_stream, gallivm, is_stream);
+
+   if (gs_selector->so.num_outputs) {
si_llvm_emit_streamout(, outputs,
   gsinfo->num_outputs,
   stream);
-   lp_build_endif(_ctx_stream);
}
+
+   if (stream == 0)
+   si_llvm_export_vs(bld_base, outputs, 
gsinfo->num_outputs);
+
+   lp_build_endif(_ctx_stream);
}
-   si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs);
 
LLVMBuildRetVoid(gallivm->builder);
 
/* Dump LLVM IR before any optimization passes */
if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
r600_can_dump_shader(>b, PIPE_SHADER_GEOMETRY))
LLVMDumpModule(bld_base->base.gallivm->module);
 
si_llvm_finalize_module(,
r600_extra_shader_checks(>b, PIPE_SHADER_GEOMETRY));
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/23] gallium: extract individual streamout output structure

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

So that we can pass pointers to individual array entries around.
---
 src/gallium/include/pipe/p_state.h | 21 +
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/src/gallium/include/pipe/p_state.h 
b/src/gallium/include/pipe/p_state.h
index 46df196..6d42b4c 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -188,42 +188,47 @@ struct pipe_scissor_state
unsigned maxx:16;
unsigned maxy:16;
 };
 
 
 struct pipe_clip_state
 {
float ucp[PIPE_MAX_CLIP_PLANES][4];
 };
 
+/**
+ * A single output for vertex transform feedback.
+ */
+struct pipe_stream_output
+{
+   unsigned register_index:8;  /**< 0 to PIPE_MAX_SHADER_OUTPUTS */
+   unsigned start_component:2; /** 0 to 3 */
+   unsigned num_components:3;  /** 1 to 4 */
+   unsigned output_buffer:3;   /**< 0 to PIPE_MAX_SO_BUFFERS */
+   unsigned dst_offset:16; /**< offset into the buffer in dwords */
+   unsigned stream:2;  /**< 0 to 3 */
+};
 
 /**
  * Stream output for vertex transform feedback.
  */
 struct pipe_stream_output_info
 {
unsigned num_outputs;
/** stride for an entire vertex for each buffer in dwords */
unsigned stride[PIPE_MAX_SO_BUFFERS];
 
/**
 * Array of stream outputs, in the order they are to be written in.
 * Selected components are tightly packed into the output buffer.
 */
-   struct {
-  unsigned register_index:8;  /**< 0 to PIPE_MAX_SHADER_OUTPUTS */
-  unsigned start_component:2; /** 0 to 3 */
-  unsigned num_components:3;  /** 1 to 4 */
-  unsigned output_buffer:3;   /**< 0 to PIPE_MAX_SO_BUFFERS */
-  unsigned dst_offset:16; /**< offset into the buffer in dwords */
-  unsigned stream:2;  /**< 0 to 3 */
-   } output[PIPE_MAX_SO_OUTPUTS];
+   struct pipe_stream_output output[PIPE_MAX_SO_OUTPUTS];
 };
 
 /**
  * The 'type' parameter identifies whether the shader state contains TGSI
  * tokens, etc.  If the driver returns 'PIPE_SHADER_IR_TGSI' for the
  * 'PIPE_SHADER_CAP_PREFERRED_IR' shader param, the ir will *always* be
  * 'PIPE_SHADER_IR_TGSI' and the tokens ptr will be valid.  If the driver
  * requests a different 'pipe_shader_ir' type, then it must check the 'type'
  * enum to see if it is getting TGSI tokens or its preferred IR.
  *
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 20/23] radeonsi: pack GS output components for each vertex stream contiguously

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Note that the memory layout of one vertex stream inside one "item" (= memory
written by one GS wave) on the GSVS ring is:

  t0v0c0 ... t15v0c0 t0v1c0 ... t15v1c0 ... t0vLc0 ... t15vLc0
  t0v0c1 ... t15v0c1 t0v1c1 ... t15v1c1 ... t0vLc1 ... t15vLc1
...
  t0v0cL ... t15v0cL t0v1cL ... t15v1cL ... t0vLcL ... t15vLcL
  t16v0c0 ... t31v0c0 t16v1c0 ... t31v1c0 ... t16vLc0 ... t31vLc0
  t16v0c1 ... t31v0c1 t16v1c1 ... t31v1c1 ... t16vLc1 ... t31vLc1
...
  t16v0cL ... t31v0cL t16v1cL ... t31v1cL ... t16vLcL ... t31vLcL

...

  t48v0c0 ... t63v0c0 t48v1c0 ... t63v1c0 ... t48vLc0 ... t63vLc0
  t48v0c1 ... t63v0c1 t48v1c1 ... t63v1c1 ... t48vLc1 ... t63vLc1
...
  t48v0cL ... t63v0cL t48v1cL ... t63v1cL ... t48vLcL ... t63vLcL

where tNN indicates the thread number, vNN the vertex number (in the order of
EMIT_VERTEX), and cNN the output component (vL and cL are the last vertex and
component, respectively).

The vertex streams are laid out sequentially.

The swizzling by 16 threads is hard-coded in the way the VGT generates the
offset passed into the GS copy shader, and the jump every 16 threads is
calculated from VGT_GSVS_RING_OFFSET_n and VGT_GSVS_RING_ITEMSIZE in a way
that makes it difficult to deviate from this layout (at least that's what
I've experimentally confirmed on VI after first trying to go the simpler
route of just interleaving the vertex streams).
---
 src/gallium/drivers/radeonsi/si_shader.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index b861831..8c1f458 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5137,21 +5137,21 @@ static void si_llvm_emit_vertex(
struct lp_build_context *uint = _base->uint_bld;
struct si_shader *shader = ctx->shader;
struct tgsi_shader_info *info = >selector->info;
struct gallivm_state *gallivm = bld_base->base.gallivm;
struct lp_build_if_state if_state;
LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
SI_PARAM_GS2VS_OFFSET);
LLVMValueRef gs_next_vertex;
LLVMValueRef can_emit, kill;
LLVMValueRef args[2];
-   unsigned chan;
+   unsigned chan, offset;
int i;
unsigned stream;
 
stream = si_llvm_get_stream(bld_base, emit_data);
 
/* Write vertex attribute values to GSVS ring */
gs_next_vertex = LLVMBuildLoad(gallivm->builder,
   ctx->gs_next_vertex[stream],
   "");
 
@@ -5172,32 +5172,34 @@ static void si_llvm_emit_vertex(
kill = lp_build_select(_base->base, can_emit,
   lp_build_const_float(gallivm, 1.0f),
   lp_build_const_float(gallivm, -1.0f));
 
lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
   ctx->voidt, , 1, 0);
} else {
lp_build_if(_state, gallivm, can_emit);
}
 
+   offset = 0;
for (i = 0; i < info->num_outputs; i++) {
LLVMValueRef *out_ptr =
ctx->soa.outputs[i];
 
for (chan = 0; chan < 4; chan++) {
if (((info->output_streams[i] >> (2 * chan)) & 3) != 
stream)
continue;
 
LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, 
out_ptr[chan], "");
LLVMValueRef voffset =
-   lp_build_const_int32(gallivm, (i * 4 + chan) *
+   lp_build_const_int32(gallivm, offset *
 
shader->selector->gs_max_out_vertices);
+   offset++;
 
voffset = lp_build_add(uint, voffset, gs_next_vertex);
voffset = lp_build_mul_imm(uint, voffset, 4);
 
out_val = LLVMBuildBitCast(gallivm->builder, out_val, 
ctx->i32, "");
 
build_tbuffer_store(ctx,
ctx->gsvs_ring[stream],
out_val, 1,
voffset, soffset, 0,
@@ -6267,42 +6269,45 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
}
 
LLVMBasicBlockRef end_bb;
LLVMValueRef switch_inst;
 
end_bb = LLVMAppendBasicBlockInContext(gallivm->context, ctx.main_fn, 
"end");
switch_inst = LLVMBuildSwitch(builder, stream_id, end_bb, 4);
 
for (int stream = 0; stream < 4; stream++) {
LLVMBasicBlockRef bb;
+   unsigned offset;
 
if 

[Mesa-dev] [PATCH 15/23] radeonsi: do not export VS outputs from vertex streams != 0

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

This affects for GS copy shaders. When an output is meant for vertex
stream != 0, then we don't have to make it available to the pixel
shader.

There is a minor inefficiency here because the GLSL varying packing pass
does not group varyings of the same vertex stream together, but it
shouldn't be important in practice.
---
 src/gallium/drivers/radeonsi/si_shader.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index cd2fd09..b2cf44a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2298,20 +2298,26 @@ static void si_llvm_export_vs(struct 
lp_build_tgsi_context *bld_base,
(1ull << 
si_shader_io_get_unique_index(semantic_name, semantic_index)))
export_param = false;
break;
default:
if (shader->key.opt.hw_vs.kill_outputs2 &
(1u << 
si_shader_io_get_unique_index2(semantic_name, semantic_index)))
export_param = false;
break;
}
 
+   if (outputs[i].vertex_stream[0] != 0 &&
+   outputs[i].vertex_stream[1] != 0 &&
+   outputs[i].vertex_stream[2] != 0 &&
+   outputs[i].vertex_stream[3] != 0)
+   export_param = false;
+
 handle_semantic:
/* Select the correct target */
switch(semantic_name) {
case TGSI_SEMANTIC_PSIZE:
psize_value = outputs[i].values[0];
continue;
case TGSI_SEMANTIC_EDGEFLAG:
edgeflag_value = outputs[i].values[0];
continue;
case TGSI_SEMANTIC_LAYER:
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/23] radeonsi: group streamout writes by vertex stream

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/gallium/drivers/radeonsi/si_shader.c | 32 ++--
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index d26c36a..fd2ed42 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2137,20 +2137,22 @@ static void emit_streamout_output(struct 
si_shader_context *ctx,
unsigned start = stream_out->start_component;
unsigned num_comps = stream_out->num_components;
LLVMValueRef out[4];
 
assert(num_comps && num_comps <= 4);
if (!num_comps || num_comps > 4)
return;
 
/* Load the output as int. */
for (int j = 0; j < num_comps; j++) {
+   assert(stream_out->stream == shader_out->vertex_stream[start + 
j]);
+
out[j] = LLVMBuildBitCast(builder,
  shader_out->values[start + j],
ctx->i32, "");
}
 
/* Pack the output. */
LLVMValueRef vdata = NULL;
 
switch (num_comps) {
case 1: /* as i32 */
@@ -2173,21 +2175,22 @@ static void emit_streamout_output(struct 
si_shader_context *ctx,
   LLVMConstInt(ctx->i32, 0, 0),
   stream_out->dst_offset * 4);
 }
 
 /* On SI, the vertex shader is responsible for writing streamout data
  * to buffers. */
 static void si_llvm_emit_streamout(struct si_shader_context *ctx,
   struct si_shader_output_values *outputs,
   unsigned noutput)
 {
-   struct pipe_stream_output_info *so = >shader->selector->so;
+   struct si_shader_selector *sel = ctx->shader->selector;
+   struct pipe_stream_output_info *so = >so;
struct gallivm_state *gallivm = >gallivm;
LLVMBuilderRef builder = gallivm->builder;
int i;
struct lp_build_if_state if_ctx;
 
/* Get bits [22:16], i.e. (so_param >> 16) & 127; */
LLVMValueRef so_vtx_count =
unpack_param(ctx, ctx->param_streamout_config, 16, 7);
 
LLVMValueRef tid = get_thread_id(ctx);
@@ -2196,21 +2199,34 @@ static void si_llvm_emit_streamout(struct 
si_shader_context *ctx,
LLVMValueRef can_emit =
LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
 
LLVMValueRef stream_id =
unpack_param(ctx, ctx->param_streamout_config, 24, 2);
 
/* Emit the streamout code conditionally. This actually avoids
 * out-of-bounds buffer access. The hw tells us via the SGPR
 * (so_vtx_count) which threads are allowed to emit streamout data. */
lp_build_if(_ctx, gallivm, can_emit);
-   {
+
+   for (int stream = 0; stream < 4; ++stream) {
+   struct lp_build_if_state if_ctx_stream;
+
+   if (!sel->info.num_stream_output_components[stream])
+   continue;
+
+   LLVMValueRef is_stream =
+   LLVMBuildICmp(builder, LLVMIntEQ,
+ stream_id,
+ lp_build_const_int32(gallivm, stream), 
"");
+
+   lp_build_if(_ctx_stream, gallivm, is_stream);
+
/* The buffer offset is computed as follows:
 *   ByteOffset = streamout_offset[buffer_id]*4 +
 *(streamout_write_index + 
thread_id)*stride[buffer_id] +
 *attrib_offset
  */
 
LLVMValueRef so_write_index =
LLVMGetParam(ctx->main_fn,
 ctx->param_streamout_write_index);
 
@@ -2238,36 +2254,32 @@ static void si_llvm_emit_streamout(struct 
si_shader_context *ctx,
so_offset = LLVMBuildMul(builder, so_offset, 
LLVMConstInt(ctx->i32, 4, 0), "");
 
so_write_offset[i] = LLVMBuildMul(builder, 
so_write_index,
  
LLVMConstInt(ctx->i32, so->stride[i]*4, 0), "");
so_write_offset[i] = LLVMBuildAdd(builder, 
so_write_offset[i], so_offset, "");
}
 
/* Write streamout data. */
for (i = 0; i < so->num_outputs; i++) {
unsigned reg = so->output[i].register_index;
-   unsigned stream = so->output[i].stream;
-   struct lp_build_if_state if_ctx_stream;
 
if (reg >= noutput)
continue;
 
-   LLVMValueRef can_emit_stream =
-   LLVMBuildICmp(builder, LLVMIntEQ,
- stream_id,
- 

[Mesa-dev] [PATCH 11/23] radeonsi: extract writing of a single streamout output

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/gallium/drivers/radeonsi/si_shader.c | 91 ++--
 1 file changed, 52 insertions(+), 39 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 4e61d73..1cda59c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2118,30 +2118,79 @@ static void si_dump_streamout(struct 
pipe_stream_output_info *so)
i, so->output[i].output_buffer,
so->output[i].dst_offset, so->output[i].dst_offset + 
so->output[i].num_components - 1,
so->output[i].register_index,
mask & 1 ? "x" : "",
mask & 2 ? "y" : "",
mask & 4 ? "z" : "",
mask & 8 ? "w" : "");
}
 }
 
+static void emit_streamout_output(struct si_shader_context *ctx,
+ LLVMValueRef const *so_buffers,
+ LLVMValueRef const *so_write_offsets,
+ struct pipe_stream_output *stream_out,
+ struct si_shader_output_values *shader_out)
+{
+   struct gallivm_state *gallivm = >gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   unsigned buf_idx = stream_out->output_buffer;
+   unsigned start = stream_out->start_component;
+   unsigned num_comps = stream_out->num_components;
+   LLVMValueRef out[4];
+
+   assert(num_comps && num_comps <= 4);
+   if (!num_comps || num_comps > 4)
+   return;
+
+   /* Load the output as int. */
+   for (int j = 0; j < num_comps; j++) {
+   out[j] = LLVMBuildBitCast(builder,
+ shader_out->values[start + j],
+   ctx->i32, "");
+   }
+
+   /* Pack the output. */
+   LLVMValueRef vdata = NULL;
+
+   switch (num_comps) {
+   case 1: /* as i32 */
+   vdata = out[0];
+   break;
+   case 2: /* as v2i32 */
+   case 3: /* as v4i32 (aligned to 4) */
+   case 4: /* as v4i32 */
+   vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, 
util_next_power_of_two(num_comps)));
+   for (int j = 0; j < num_comps; j++) {
+   vdata = LLVMBuildInsertElement(builder, vdata, out[j],
+  LLVMConstInt(ctx->i32, 
j, 0), "");
+   }
+   break;
+   }
+
+   build_tbuffer_store_dwords(ctx, so_buffers[buf_idx],
+  vdata, num_comps,
+  so_write_offsets[buf_idx],
+  LLVMConstInt(ctx->i32, 0, 0),
+  stream_out->dst_offset * 4);
+}
+
 /* On SI, the vertex shader is responsible for writing streamout data
  * to buffers. */
 static void si_llvm_emit_streamout(struct si_shader_context *ctx,
   struct si_shader_output_values *outputs,
   unsigned noutput)
 {
struct pipe_stream_output_info *so = >shader->selector->so;
struct gallivm_state *gallivm = >gallivm;
LLVMBuilderRef builder = gallivm->builder;
-   int i, j;
+   int i;
struct lp_build_if_state if_ctx;
LLVMValueRef so_buffers[4];
LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
SI_PARAM_RW_BUFFERS);
 
/* Load the descriptors. */
for (i = 0; i < 4; ++i) {
if (ctx->shader->selector->so.stride[i]) {
LLVMValueRef offset = lp_build_const_int32(gallivm,
   
SI_VS_STREAMOUT_BUF0 + i);
@@ -2191,71 +2240,35 @@ static void si_llvm_emit_streamout(struct 
si_shader_context *ctx,
  
ctx->param_streamout_offset[i]);
so_offset = LLVMBuildMul(builder, so_offset, 
LLVMConstInt(ctx->i32, 4, 0), "");
 
so_write_offset[i] = LLVMBuildMul(builder, 
so_write_index,
  
LLVMConstInt(ctx->i32, so->stride[i]*4, 0), "");
so_write_offset[i] = LLVMBuildAdd(builder, 
so_write_offset[i], so_offset, "");
}
 
/* Write streamout data. */
for (i = 0; i < so->num_outputs; i++) {
-   unsigned buf_idx = so->output[i].output_buffer;
unsigned reg = so->output[i].register_index;
-   unsigned start = so->output[i].start_component;
-   unsigned num_comps = so->output[i].num_components;
unsigned stream = so->output[i].stream;
- 

[Mesa-dev] [PATCH 09/23] radeonsi: plumb the output vertex_stream through to si_shader_output_values

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/gallium/drivers/radeonsi/si_shader.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index c488f91..b420a7b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -50,20 +50,21 @@ static const char *scratch_rsrc_dword0_symbol =
"SCRATCH_RSRC_DWORD0";
 
 static const char *scratch_rsrc_dword1_symbol =
"SCRATCH_RSRC_DWORD1";
 
 struct si_shader_output_values
 {
LLVMValueRef values[4];
unsigned semantic_name;
unsigned semantic_index;
+   ubyte vertex_stream[4];
 };
 
 static void si_init_shader_ctx(struct si_shader_context *ctx,
   struct si_screen *sscreen,
   struct si_shader *shader,
   LLVMTargetMachineRef tm);
 
 static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
 struct lp_build_tgsi_context *bld_base,
 struct lp_build_emit_data *emit_data);
@@ -2789,25 +2790,29 @@ static void si_llvm_emit_vs_epilogue(struct 
lp_build_tgsi_context *bld_base)
}
 
if (cond)
lp_build_endif(_ctx);
}
 
for (i = 0; i < info->num_outputs; i++) {
outputs[i].semantic_name = info->output_semantic_name[i];
outputs[i].semantic_index = info->output_semantic_index[i];
 
-   for (j = 0; j < 4; j++)
+   for (j = 0; j < 4; j++) {
outputs[i].values[j] =
LLVMBuildLoad(gallivm->builder,
  ctx->soa.outputs[i][j],
  "");
+   outputs[i].vertex_stream[j] =
+   (info->output_streams[i] >> (2 * j)) & 3;
+   }
+
}
 
/* Return the primitive ID from the LLVM function. */
ctx->return_value =
LLVMBuildInsertValue(gallivm->builder,
 ctx->return_value,
 bitcast(bld_base, TGSI_TYPE_FLOAT,
 get_primitive_id(bld_base, 0)),
 VS_EPILOG_PRIMID_LOC, "");
 
@@ -6227,20 +6232,23 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
args[8] = uint->zero; /* TFE */
 
/* Fetch vertex data from GSVS ring */
for (i = 0; i < gsinfo->num_outputs; ++i) {
unsigned chan;
 
outputs[i].semantic_name = gsinfo->output_semantic_name[i];
outputs[i].semantic_index = gsinfo->output_semantic_index[i];
 
for (chan = 0; chan < 4; chan++) {
+   outputs[i].vertex_stream[chan] =
+   (gsinfo->output_streams[i] >> (2 * chan)) & 3;
+
args[2] = lp_build_const_int32(gallivm,
   (i * 4 + chan) *
   
gs_selector->gs_max_out_vertices * 16 * 4);
 
outputs[i].values[chan] =
LLVMBuildBitCast(gallivm->builder,
 
lp_build_intrinsic(gallivm->builder,
 
"llvm.SI.buffer.load.dword.i32.i32",
 ctx.i32, args, 
9,
 
LP_FUNC_ATTR_READONLY),
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/23] radeonsi: do not kill GS with memory writes

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Vertex emits beyond the specified maximum number of vertices are supposed to
have no effect, which is why we used to always kill GS that reached the limit.

However, if the GS also writes to memory (SSBO, atomics, shader images), then
we must keep going and only skip the vertex emit itself.

Cc: mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/radeonsi/si_shader.c | 30 ++
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 1e3be62..aac3091 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5116,50 +5116,60 @@ static unsigned si_llvm_get_stream(struct 
lp_build_tgsi_context *bld_base,
 static void si_llvm_emit_vertex(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
struct lp_build_context *uint = _base->uint_bld;
struct si_shader *shader = ctx->shader;
struct tgsi_shader_info *info = >selector->info;
struct gallivm_state *gallivm = bld_base->base.gallivm;
+   struct lp_build_if_state if_state;
LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
SI_PARAM_GS2VS_OFFSET);
LLVMValueRef gs_next_vertex;
LLVMValueRef can_emit, kill;
LLVMValueRef args[2];
unsigned chan;
int i;
unsigned stream;
 
stream = si_llvm_get_stream(bld_base, emit_data);
 
/* Write vertex attribute values to GSVS ring */
gs_next_vertex = LLVMBuildLoad(gallivm->builder,
   ctx->gs_next_vertex[stream],
   "");
 
/* If this thread has already emitted the declared maximum number of
-* vertices, kill it: excessive vertex emissions are not supposed to
-* have any effect, and GS threads have no externally observable
-* effects other than emitting vertices.
+* vertices, skip the write: excessive vertex emissions are not
+* supposed to have any effect.
+*
+* If the shader has no writes to memory, kill it instead. This skips
+* further memory loads and may allow LLVM to skip to the end
+* altogether.
 */
can_emit = LLVMBuildICmp(gallivm->builder, LLVMIntULE, gs_next_vertex,
 lp_build_const_int32(gallivm,
  
shader->selector->gs_max_out_vertices), "");
-   kill = lp_build_select(_base->base, can_emit,
-  lp_build_const_float(gallivm, 1.0f),
-  lp_build_const_float(gallivm, -1.0f));
 
-   lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
-  ctx->voidt, , 1, 0);
+   bool use_kill = !info->writes_memory;
+   if (use_kill) {
+   kill = lp_build_select(_base->base, can_emit,
+  lp_build_const_float(gallivm, 1.0f),
+  lp_build_const_float(gallivm, -1.0f));
+
+   lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
+  ctx->voidt, , 1, 0);
+   } else {
+   lp_build_if(_state, gallivm, can_emit);
+   }
 
for (i = 0; i < info->num_outputs; i++) {
LLVMValueRef *out_ptr =
ctx->soa.outputs[i];
 
for (chan = 0; chan < 4; chan++) {
LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, 
out_ptr[chan], "");
LLVMValueRef voffset =
lp_build_const_int32(gallivm, (i * 4 + chan) *
 
shader->selector->gs_max_out_vertices);
@@ -5171,30 +5181,34 @@ static void si_llvm_emit_vertex(
 
build_tbuffer_store(ctx,
ctx->gsvs_ring[stream],
out_val, 1,
voffset, soffset, 0,
V_008F0C_BUF_DATA_FORMAT_32,
V_008F0C_BUF_NUM_FORMAT_UINT,
1, 0, 1, 1, 0);
}
}
+
gs_next_vertex = lp_build_add(uint, gs_next_vertex,
  lp_build_const_int32(gallivm, 1));
 
LLVMBuildStore(gallivm->builder, gs_next_vertex, 
ctx->gs_next_vertex[stream]);
 
/* Signal vertex emission */
args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_EMIT | SENDMSG_GS 
| (stream << 8));
args[1] = 

[Mesa-dev] [PATCH 18/23] radeonsi: generate an explicit switch instruction over vertex streams

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

SimplifyCFG generates a switch instruction anyway when all four streams
are present, but is simultaneously not smart enough to eliminate some
redundant jumps that it generates.

The generated assembly is still a bit silly, probably because the
control flow annotation doesn't know how to handle a switch with uniform
condition.
---
 src/gallium/drivers/radeonsi/si_shader.c | 21 +
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 6aaf318..0eeff19 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6256,35 +6256,38 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
for (i = 0; i < gsinfo->num_outputs; ++i) {
outputs[i].semantic_name = gsinfo->output_semantic_name[i];
outputs[i].semantic_index = gsinfo->output_semantic_index[i];
 
for (int chan = 0; chan < 4; chan++) {
outputs[i].vertex_stream[chan] =
(gsinfo->output_streams[i] >> (2 * chan)) & 3;
}
}
 
+   LLVMBasicBlockRef end_bb;
+   LLVMValueRef switch_inst;
+
+   end_bb = LLVMAppendBasicBlockInContext(gallivm->context, ctx.main_fn, 
"end");
+   switch_inst = LLVMBuildSwitch(builder, stream_id, end_bb, 4);
+
for (int stream = 0; stream < 4; stream++) {
-   struct lp_build_if_state if_ctx_stream;
+   LLVMBasicBlockRef bb;
 
if (!gsinfo->num_stream_output_components[stream])
continue;
 
if (stream > 0 && !gs_selector->so.num_outputs)
continue;
 
-   LLVMValueRef is_stream =
-   LLVMBuildICmp(builder, LLVMIntEQ,
- stream_id,
- lp_build_const_int32(gallivm, stream), 
"");
-
-   lp_build_if(_ctx_stream, gallivm, is_stream);
+   bb = LLVMInsertBasicBlockInContext(gallivm->context, end_bb, 
"out");
+   LLVMAddCase(switch_inst, lp_build_const_int32(gallivm, stream), 
bb);
+   LLVMPositionBuilderAtEnd(builder, bb);
 
/* Fetch vertex data from GSVS ring */
for (i = 0; i < gsinfo->num_outputs; ++i) {
for (unsigned chan = 0; chan < 4; chan++) {
if (outputs[i].vertex_stream[chan] != stream) {
outputs[i].values[chan] = 
ctx.soa.bld_base.base.undef;
continue;
}
 
args[2] = lp_build_const_int32(
@@ -6304,23 +6307,25 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
/* Streamout and exports. */
if (gs_selector->so.num_outputs) {
si_llvm_emit_streamout(, outputs,
   gsinfo->num_outputs,
   stream);
}
 
if (stream == 0)
si_llvm_export_vs(bld_base, outputs, 
gsinfo->num_outputs);
 
-   lp_build_endif(_ctx_stream);
+   LLVMBuildBr(builder, end_bb);
}
 
+   LLVMPositionBuilderAtEnd(builder, end_bb);
+
LLVMBuildRetVoid(gallivm->builder);
 
/* Dump LLVM IR before any optimization passes */
if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
r600_can_dump_shader(>b, PIPE_SHADER_GEOMETRY))
LLVMDumpModule(bld_base->base.gallivm->module);
 
si_llvm_finalize_module(,
r600_extra_shader_checks(>b, PIPE_SHADER_GEOMETRY));
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 19/23] radeonsi: only write values belonging to the stream when emitting GS vertex

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/gallium/drivers/radeonsi/si_shader.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 0eeff19..b861831 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5177,20 +5177,23 @@ static void si_llvm_emit_vertex(
   ctx->voidt, , 1, 0);
} else {
lp_build_if(_state, gallivm, can_emit);
}
 
for (i = 0; i < info->num_outputs; i++) {
LLVMValueRef *out_ptr =
ctx->soa.outputs[i];
 
for (chan = 0; chan < 4; chan++) {
+   if (((info->output_streams[i] >> (2 * chan)) & 3) != 
stream)
+   continue;
+
LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, 
out_ptr[chan], "");
LLVMValueRef voffset =
lp_build_const_int32(gallivm, (i * 4 + chan) *
 
shader->selector->gs_max_out_vertices);
 
voffset = lp_build_add(uint, voffset, gs_next_vertex);
voffset = lp_build_mul_imm(uint, voffset, 4);
 
out_val = LLVMBuildBitCast(gallivm->builder, out_val, 
ctx->i32, "");
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/23] tgsi: add Stream{X, Y, Z, W} fields to tgsi_declaration_semantic

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

This is for geometry shader outputs. Without it, drivers have no way of
knowing which stream each output is intended for, and have to
conservatively write all outputs to all streams.

Separate stream numbers for each component are required due to output
packing.
---
 src/compiler/glsl/ir_print_visitor.cpp |  4 +--
 src/gallium/auxiliary/tgsi/tgsi_build.c| 18 +--
 src/gallium/auxiliary/tgsi/tgsi_dump.c | 13 
 src/gallium/auxiliary/tgsi/tgsi_text.c | 48 ++
 src/gallium/include/pipe/p_shader_tokens.h |  5 +++-
 5 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/src/compiler/glsl/ir_print_visitor.cpp 
b/src/compiler/glsl/ir_print_visitor.cpp
index 2b77c14..d401426 100644
--- a/src/compiler/glsl/ir_print_visitor.cpp
+++ b/src/compiler/glsl/ir_print_visitor.cpp
@@ -173,26 +173,26 @@ void ir_print_visitor::visit(ir_variable *ir)
if (ir->data.location != -1)
   snprintf(loc, sizeof(loc), "location=%i ", ir->data.location);
 
char component[32] = {0};
if (ir->data.explicit_component)
   snprintf(component, sizeof(component), "component=%i ", 
ir->data.location_frac);
 
char stream[32] = {0};
if (ir->data.stream & (1u << 31)) {
   if (ir->data.stream & ~(1u << 31)) {
- snprintf(stream, sizeof(stream), "stream(%u,%u,%u,%u)",
+ snprintf(stream, sizeof(stream), "stream(%u,%u,%u,%u) ",
   ir->data.stream & 3, (ir->data.stream >> 2) & 3,
   (ir->data.stream >> 4) & 3, (ir->data.stream >> 6) & 3);
   }
} else if (ir->data.stream) {
-  snprintf(stream, sizeof(stream), "stream%u", ir->data.stream);
+  snprintf(stream, sizeof(stream), "stream%u ", ir->data.stream);
}
 
const char *const cent = (ir->data.centroid) ? "centroid " : "";
const char *const samp = (ir->data.sample) ? "sample " : "";
const char *const patc = (ir->data.patch) ? "patch " : "";
const char *const inv = (ir->data.invariant) ? "invariant " : "";
const char *const prec = (ir->data.precise) ? "precise " : "";
const char *const mode[] = { "", "uniform ", "shader_storage ",
 "shader_shared ", "shader_in ", "shader_out ",
 "in ", "out ", "inout ",
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c 
b/src/gallium/auxiliary/tgsi/tgsi_build.c
index d525c8f..773f892 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -232,40 +232,50 @@ tgsi_build_declaration_interp(unsigned interpolate,
return di;
 }
 
 static struct tgsi_declaration_semantic
 tgsi_default_declaration_semantic( void )
 {
struct tgsi_declaration_semantic ds;
 
ds.Name = TGSI_SEMANTIC_POSITION;
ds.Index = 0;
-   ds.Padding = 0;
+   ds.StreamX = 0;
+   ds.StreamY = 0;
+   ds.StreamZ = 0;
+   ds.StreamW = 0;
 
return ds;
 }
 
 static struct tgsi_declaration_semantic
 tgsi_build_declaration_semantic(
unsigned semantic_name,
unsigned semantic_index,
+   unsigned streamx,
+   unsigned streamy,
+   unsigned streamz,
+   unsigned streamw,
struct tgsi_declaration *declaration,
struct tgsi_header *header )
 {
struct tgsi_declaration_semantic ds;
 
assert( semantic_name <= TGSI_SEMANTIC_COUNT );
assert( semantic_index <= 0x );
 
ds.Name = semantic_name;
ds.Index = semantic_index;
-   ds.Padding = 0;
+   ds.StreamX = streamx;
+   ds.StreamY = streamy;
+   ds.StreamZ = streamz;
+   ds.StreamW = streamw;
 
declaration_grow( declaration, header );
 
return ds;
 }
 
 static struct tgsi_declaration_image
 tgsi_default_declaration_image(void)
 {
struct tgsi_declaration_image di;
@@ -454,20 +464,24 @@ tgsi_build_full_declaration(
   struct tgsi_declaration_semantic *ds;
 
   if( maxsize <= size )
  return  0;
   ds = (struct tgsi_declaration_semantic *) [size];
   size++;
 
   *ds = tgsi_build_declaration_semantic(
  full_decl->Semantic.Name,
  full_decl->Semantic.Index,
+ full_decl->Semantic.StreamX,
+ full_decl->Semantic.StreamY,
+ full_decl->Semantic.StreamZ,
+ full_decl->Semantic.StreamW,
  declaration,
  header );
}
 
if (full_decl->Declaration.File == TGSI_FILE_IMAGE) {
   struct tgsi_declaration_image *di;
 
   if (maxsize <= size) {
  return  0;
   }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c 
b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 614bcb2..f74aad1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -353,20 +353,33 @@ iter_declaration(
if (decl->Declaration.Semantic) {
   TXT( ", " );
   ENM( decl->Semantic.Name, tgsi_semantic_names );
   if (decl->Semantic.Index != 0 ||
   decl->Semantic.Name == TGSI_SEMANTIC_TEXCOORD ||
   decl->Semantic.Name == TGSI_SEMANTIC_GENERIC) 

[Mesa-dev] [PATCH 08/23] radeonsi: rename members of si_shader_output_values

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Be a bit more verbose and avoid confusion in future patches.
---
 src/gallium/drivers/radeonsi/si_shader.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index aac3091..c488f91 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -48,22 +48,22 @@
 
 static const char *scratch_rsrc_dword0_symbol =
"SCRATCH_RSRC_DWORD0";
 
 static const char *scratch_rsrc_dword1_symbol =
"SCRATCH_RSRC_DWORD1";
 
 struct si_shader_output_values
 {
LLVMValueRef values[4];
-   unsigned name;
-   unsigned sid;
+   unsigned semantic_name;
+   unsigned semantic_index;
 };
 
 static void si_init_shader_ctx(struct si_shader_context *ctx,
   struct si_screen *sscreen,
   struct si_shader *shader,
   LLVMTargetMachineRef tm);
 
 static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
 struct lp_build_tgsi_context *bld_base,
 struct lp_build_emit_data *emit_data);
@@ -2272,22 +2272,22 @@ static void si_llvm_export_vs(struct 
lp_build_tgsi_context *bld_base,
unsigned target;
unsigned param_count = 0;
unsigned pos_idx;
int i;
 
if (outputs && ctx->shader->selector->so.num_outputs) {
si_llvm_emit_streamout(ctx, outputs, noutput);
}
 
for (i = 0; i < noutput; i++) {
-   semantic_name = outputs[i].name;
-   semantic_index = outputs[i].sid;
+   semantic_name = outputs[i].semantic_name;
+   semantic_index = outputs[i].semantic_index;
bool export_param = true;
 
switch (semantic_name) {
case TGSI_SEMANTIC_POSITION: /* ignore these */
case TGSI_SEMANTIC_PSIZE:
case TGSI_SEMANTIC_CLIPVERTEX:
case TGSI_SEMANTIC_EDGEFLAG:
break;
case TGSI_SEMANTIC_GENERIC:
case TGSI_SEMANTIC_CLIPDIST:
@@ -2786,22 +2786,22 @@ static void si_llvm_emit_vs_epilogue(struct 
lp_build_tgsi_context *bld_base)
val = si_llvm_saturate(bld_base, val);
LLVMBuildStore(gallivm->builder, val, addr);
}
}
 
if (cond)
lp_build_endif(_ctx);
}
 
for (i = 0; i < info->num_outputs; i++) {
-   outputs[i].name = info->output_semantic_name[i];
-   outputs[i].sid = info->output_semantic_index[i];
+   outputs[i].semantic_name = info->output_semantic_name[i];
+   outputs[i].semantic_index = info->output_semantic_index[i];
 
for (j = 0; j < 4; j++)
outputs[i].values[j] =
LLVMBuildLoad(gallivm->builder,
  ctx->soa.outputs[i][j],
  "");
}
 
/* Return the primitive ID from the LLVM function. */
ctx->return_value =
@@ -6223,22 +6223,22 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
args[4] = uint->one;  /* OFFEN */
args[5] = uint->zero; /* IDXEN */
args[6] = uint->one;  /* GLC */
args[7] = uint->one;  /* SLC */
args[8] = uint->zero; /* TFE */
 
/* Fetch vertex data from GSVS ring */
for (i = 0; i < gsinfo->num_outputs; ++i) {
unsigned chan;
 
-   outputs[i].name = gsinfo->output_semantic_name[i];
-   outputs[i].sid = gsinfo->output_semantic_index[i];
+   outputs[i].semantic_name = gsinfo->output_semantic_name[i];
+   outputs[i].semantic_index = gsinfo->output_semantic_index[i];
 
for (chan = 0; chan < 4; chan++) {
args[2] = lp_build_const_int32(gallivm,
   (i * 4 + chan) *
   
gs_selector->gs_max_out_vertices * 16 * 4);
 
outputs[i].values[chan] =
LLVMBuildBitCast(gallivm->builder,
 
lp_build_intrinsic(gallivm->builder,
 
"llvm.SI.buffer.load.dword.i32.i32",
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/23] radeonsi: load the streamout buf descriptors closer to their use

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

LLVM can still decide to hoist the loads since they're marked invariant.
---
 src/gallium/drivers/radeonsi/si_shader.c | 25 +++--
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 1cda59c..d26c36a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2178,33 +2178,20 @@ static void emit_streamout_output(struct 
si_shader_context *ctx,
  * to buffers. */
 static void si_llvm_emit_streamout(struct si_shader_context *ctx,
   struct si_shader_output_values *outputs,
   unsigned noutput)
 {
struct pipe_stream_output_info *so = >shader->selector->so;
struct gallivm_state *gallivm = >gallivm;
LLVMBuilderRef builder = gallivm->builder;
int i;
struct lp_build_if_state if_ctx;
-   LLVMValueRef so_buffers[4];
-   LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
-   SI_PARAM_RW_BUFFERS);
-
-   /* Load the descriptors. */
-   for (i = 0; i < 4; ++i) {
-   if (ctx->shader->selector->so.stride[i]) {
-   LLVMValueRef offset = lp_build_const_int32(gallivm,
-  
SI_VS_STREAMOUT_BUF0 + i);
-
-   so_buffers[i] = build_indexed_load_const(ctx, buf_ptr, 
offset);
-   }
-   }
 
/* Get bits [22:16], i.e. (so_param >> 16) & 127; */
LLVMValueRef so_vtx_count =
unpack_param(ctx, ctx->param_streamout_config, 16, 7);
 
LLVMValueRef tid = get_thread_id(ctx);
 
/* can_emit = tid < so_vtx_count; */
LLVMValueRef can_emit =
LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
@@ -2223,26 +2210,36 @@ static void si_llvm_emit_streamout(struct 
si_shader_context *ctx,
 *attrib_offset
  */
 
LLVMValueRef so_write_index =
LLVMGetParam(ctx->main_fn,
 ctx->param_streamout_write_index);
 
/* Compute (streamout_write_index + thread_id). */
so_write_index = LLVMBuildAdd(builder, so_write_index, tid, "");
 
-   /* Compute the write offset for each enabled buffer. */
+   /* Load the descriptor and compute the write offset for each
+* enabled buffer. */
LLVMValueRef so_write_offset[4] = {};
+   LLVMValueRef so_buffers[4];
+   LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
+   SI_PARAM_RW_BUFFERS);
+
for (i = 0; i < 4; i++) {
if (!so->stride[i])
continue;
 
+   LLVMValueRef offset = lp_build_const_int32(gallivm,
+  
SI_VS_STREAMOUT_BUF0 + i);
+
+   so_buffers[i] = build_indexed_load_const(ctx, buf_ptr, 
offset);
+
LLVMValueRef so_offset = LLVMGetParam(ctx->main_fn,
  
ctx->param_streamout_offset[i]);
so_offset = LLVMBuildMul(builder, so_offset, 
LLVMConstInt(ctx->i32, 4, 0), "");
 
so_write_offset[i] = LLVMBuildMul(builder, 
so_write_index,
  
LLVMConstInt(ctx->i32, so->stride[i]*4, 0), "");
so_write_offset[i] = LLVMBuildAdd(builder, 
so_write_offset[i], so_offset, "");
}
 
/* Write streamout data. */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/23] radeonsi: update all GSVS ring descriptors for new buffer allocations

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Fixes 
GL45-CTS.gtf40.GL3Tests.transform_feedback3.transform_feedback3_geometry_instanced.

Cc: mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 0afc3b4..ea71569 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2031,24 +2031,29 @@ static bool si_update_gs_ring_buffers(struct si_context 
*sctx)
 
/* Set ring bindings. */
if (sctx->esgs_ring) {
si_set_ring_buffer(>b.b, SI_ES_RING_ESGS,
   sctx->esgs_ring, 0, sctx->esgs_ring->width0,
   true, true, 4, 64, 0);
si_set_ring_buffer(>b.b, SI_GS_RING_ESGS,
   sctx->esgs_ring, 0, sctx->esgs_ring->width0,
   false, false, 0, 0, 0);
}
-   if (sctx->gsvs_ring)
+   if (sctx->gsvs_ring) {
si_set_ring_buffer(>b.b, SI_VS_RING_GSVS,
   sctx->gsvs_ring, 0, sctx->gsvs_ring->width0,
   false, false, 0, 0, 0);
+
+   /* Also update SI_GS_RING_GSVSi descriptors. */
+   sctx->last_gsvs_itemsize = 0;
+   }
+
return true;
 }
 
 static void si_update_gsvs_ring_bindings(struct si_context *sctx)
 {
unsigned gsvs_itemsize = sctx->gs_shader.cso->max_gsvs_emit_size;
uint64_t offset;
 
if (!sctx->gsvs_ring || gsvs_itemsize == sctx->last_gsvs_itemsize)
return;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/23] tgsi/scan: collect information about output vertex streams

2016-11-30 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/gallium/auxiliary/tgsi/tgsi_scan.c | 9 +
 src/gallium/auxiliary/tgsi/tgsi_scan.h | 2 ++
 2 files changed, 11 insertions(+)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c 
b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 84d6456..cd95033 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -538,21 +538,30 @@ scan_declaration(struct tgsi_shader_info *info,
 break;
  case TGSI_SEMANTIC_SAMPLEMASK:
 info->reads_samplemask = TRUE;
 break;
  }
  break;
 
   case TGSI_FILE_OUTPUT:
  info->output_semantic_name[reg] = (ubyte) semName;
  info->output_semantic_index[reg] = (ubyte) semIndex;
+ info->output_streams[reg] =
+(ubyte)fulldecl->Semantic.StreamX |
+((ubyte)fulldecl->Semantic.StreamY << 2) |
+((ubyte)fulldecl->Semantic.StreamZ << 4) |
+((ubyte)fulldecl->Semantic.StreamW << 6);
  info->num_outputs = MAX2(info->num_outputs, reg + 1);
+ info->num_stream_output_components[fulldecl->Semantic.StreamX]++;
+ info->num_stream_output_components[fulldecl->Semantic.StreamY]++;
+ info->num_stream_output_components[fulldecl->Semantic.StreamZ]++;
+ info->num_stream_output_components[fulldecl->Semantic.StreamW]++;
 
  switch (semName) {
  case TGSI_SEMANTIC_VIEWPORT_INDEX:
 info->writes_viewport_index = true;
 break;
  case TGSI_SEMANTIC_LAYER:
 info->writes_layer = true;
 break;
  case TGSI_SEMANTIC_PSIZE:
 info->writes_psize = true;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h 
b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index fe503e8..a6c8267 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -47,34 +47,36 @@ struct tgsi_shader_info
ubyte num_inputs;
ubyte num_outputs;
ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */
ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS];
ubyte input_interpolate_loc[PIPE_MAX_SHADER_INPUTS];
ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS];
ubyte input_cylindrical_wrap[PIPE_MAX_SHADER_INPUTS];
ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */
ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
+   ubyte output_streams[PIPE_MAX_SHADER_OUTPUTS];
 
ubyte num_system_values;
ubyte system_value_semantic_name[PIPE_MAX_SHADER_INPUTS];
 
ubyte processor;
 
uint file_mask[TGSI_FILE_COUNT];  /**< bitmask of declared registers */
uint file_count[TGSI_FILE_COUNT];  /**< number of declared registers */
int file_max[TGSI_FILE_COUNT];  /**< highest index of declared registers */
int const_file_max[PIPE_MAX_CONSTANT_BUFFERS];
unsigned const_buffers_declared; /**< bitmask of declared const buffers */
unsigned samplers_declared; /**< bitmask of declared samplers */
ubyte sampler_targets[PIPE_MAX_SHADER_SAMPLER_VIEWS];  /**< TGSI_TEXTURE_x 
values */
ubyte sampler_type[PIPE_MAX_SHADER_SAMPLER_VIEWS]; /**< TGSI_RETURN_TYPE_x 
*/
+   ubyte num_stream_output_components[4];
 
ubyte input_array_first[PIPE_MAX_SHADER_INPUTS];
ubyte input_array_last[PIPE_MAX_SHADER_INPUTS];
ubyte output_array_first[PIPE_MAX_SHADER_OUTPUTS];
ubyte output_array_last[PIPE_MAX_SHADER_OUTPUTS];
unsigned array_max[TGSI_FILE_COUNT];  /**< highest index array per register 
file */
 
uint immediate_count; /**< number of immediates declared */
uint num_instructions;
uint num_memory_instructions; /**< sampler, buffer, and image instructions 
*/
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >