Re: [Mesa-dev] [PATCH 00/14] radeonsi: Offchip tessellation

2016-05-19 Thread Bas Nieuwenhuizen
> Bas,
>
> do we see this with Mesa 11.3 / 12.0?
> Should read did you have an updated version ready for release?
>
> Thanks,
>   Dieter

Hi Dieter,

There is a v2 on the list on which there are still some comments I
need to resolve. However, I have been and am away from my dev machine
this week. I am not sure what the current feature freeze deadline is,
but if it still is this weekend, the fixed series will probably come
too late for that.

Yours sincerely.
Bas Nieuwenhuizen
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: drop assert for new arrays code.

2016-05-19 Thread Jason Ekstrand
On Thu, May 19, 2016 at 8:48 PM, Dave Airlie  wrote:

> From: Dave Airlie 
>
> This code handles 0 length fine,


Um... No it doesn't.  A length of 0 means unsized which means we really
need to to a get_array_length call and loop.  Am I messing something?


> and with the new glsl layer
> code to handle unsized array better, we can hit this path with
> ./bin/arb_separate_shader_object-GetProgramPipelineiv
>
> removing the assert works fine.
>
> Signed-off-by: Dave Airlie 
> ---
>  src/compiler/nir/nir_lower_var_copies.c | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/src/compiler/nir/nir_lower_var_copies.c
> b/src/compiler/nir/nir_lower_var_copies.c
> index b7e9989..f774f53 100644
> --- a/src/compiler/nir/nir_lower_var_copies.c
> +++ b/src/compiler/nir/nir_lower_var_copies.c
> @@ -93,7 +93,6 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
>unsigned length = glsl_get_length(src_arr_parent->type);
>/* The wildcards should represent the same number of elements */
>assert(length == glsl_get_length(dest_arr_parent->type));
> -  assert(length > 0);
>
>/* Walk over all of the elements that this wildcard refers to and
> * call emit_copy_load_store on each one of them */
> --
> 2.5.5
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] i965/blorp: Use core vertex buffer state setup

2016-05-19 Thread Topi Pohjolainen
Also split the setup from the setup of vertex elements.

Signed-off-by: Topi Pohjolainen 
---
 src/mesa/drivers/dri/i965/gen6_blorp.c | 102 -
 1 file changed, 36 insertions(+), 66 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.c 
b/src/mesa/drivers/dri/i965/gen6_blorp.c
index 5f84ab0..4d73d04 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.c
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.c
@@ -30,56 +30,12 @@
 #include "brw_state.h"
 
 #include "brw_blorp.h"
+#include "vbo/vbo.h"
+#include "brw_draw.h"
 
 static void
-gen6_blorp_emit_vertex_buffer_state(struct brw_context *brw,
-unsigned num_elems,
-unsigned vbo_size,
-uint32_t vertex_offset)
-{
-   /* 3DSTATE_VERTEX_BUFFERS */
-   const int num_buffers = 1;
-   const int batch_length = 1 + 4 * num_buffers;
-
-   uint32_t dw0 = GEN6_VB0_ACCESS_VERTEXDATA |
-  (num_elems * sizeof(float)) << BRW_VB0_PITCH_SHIFT;
-
-   if (brw->gen >= 7)
-  dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
-
-   switch (brw->gen) {
-   case 7:
-  dw0 |= GEN7_MOCS_L3 << 16;
-  break;
-   case 8:
-  dw0 |= BDW_MOCS_WB << 16;
-  break;
-   case 9:
-  dw0 |= SKL_MOCS_WB << 16;
-  break;
-   }
-
-   BEGIN_BATCH(batch_length);
-   OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2));
-   OUT_BATCH(dw0);
-   if (brw->gen >= 8) {
-  OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0, vertex_offset);
-  OUT_BATCH(vbo_size);
-   } else {
-  /* start address */
-  OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
-vertex_offset);
-  /* end address */
-  OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
-vertex_offset + vbo_size - 1);
-  OUT_BATCH(0);
-   }
-   ADVANCE_BATCH();
-}
-
-void
-gen6_blorp_emit_vertices(struct brw_context *brw,
- const struct brw_blorp_params *params)
+gen6_blorp_emit_vertex_data(struct brw_context *brw,
+const struct brw_blorp_params *params)
 {
uint32_t vertex_offset;
 
@@ -119,24 +75,38 @@ gen6_blorp_emit_vertices(struct brw_context *brw,
 * instead of reading them from the buffer. See the vertex element setup
 * below.
 */
-   {
-  float *vertex_data;
-
-  const float vertices[] = {
- /* v0 */ (float)params->x0, (float)params->y1,
- /* v1 */ (float)params->x1, (float)params->y1,
- /* v2 */ (float)params->x0, (float)params->y0,
-  };
-
-  vertex_data = (float *) brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER,
-  sizeof(vertices), 32,
-  _offset);
-  memcpy(vertex_data, vertices, sizeof(vertices));
-
-  const unsigned blorp_num_vue_elems = 2;
-  gen6_blorp_emit_vertex_buffer_state(brw, blorp_num_vue_elems,
-  sizeof(vertices), vertex_offset);
-   }
+   const float vertices[] = {
+  /* v0 */ (float)params->x0, (float)params->y1,
+  /* v1 */ (float)params->x1, (float)params->y1,
+  /* v2 */ (float)params->x0, (float)params->y0,
+   };
+
+   float *const vertex_data = (float *)brw_state_batch(
+  brw, AUB_TRACE_VERTEX_BUFFER,
+  sizeof(vertices), 32,
+  _offset);
+   memcpy(vertex_data, vertices, sizeof(vertices));
+
+   /* 3DSTATE_VERTEX_BUFFERS */
+   const int num_buffers = 1;
+   const int batch_length = 1 + 4 * num_buffers;
+
+   BEGIN_BATCH(batch_length);
+   OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2));
+
+   const unsigned blorp_num_vue_elems = 2;
+   const unsigned stride = blorp_num_vue_elems * sizeof(float);
+   EMIT_VERTEX_BUFFER_STATE(brw, 0 /* buffer_nr */, brw->batch.bo,
+vertex_offset, vertex_offset + sizeof(vertices),
+stride, 0 /* steprate */);
+   ADVANCE_BATCH();
+}
+
+void
+gen6_blorp_emit_vertices(struct brw_context *brw,
+ const struct brw_blorp_params *params)
+{
+   gen6_blorp_emit_vertex_data(brw, params);
 
/* 3DSTATE_VERTEX_ELEMENTS
 *
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nir: drop assert for new arrays code.

2016-05-19 Thread Dave Airlie
From: Dave Airlie 

This code handles 0 length fine, and with the new glsl layer
code to handle unsized array better, we can hit this path with
./bin/arb_separate_shader_object-GetProgramPipelineiv

removing the assert works fine.

Signed-off-by: Dave Airlie 
---
 src/compiler/nir/nir_lower_var_copies.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/compiler/nir/nir_lower_var_copies.c 
b/src/compiler/nir/nir_lower_var_copies.c
index b7e9989..f774f53 100644
--- a/src/compiler/nir/nir_lower_var_copies.c
+++ b/src/compiler/nir/nir_lower_var_copies.c
@@ -93,7 +93,6 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
   unsigned length = glsl_get_length(src_arr_parent->type);
   /* The wildcards should represent the same number of elements */
   assert(length == glsl_get_length(dest_arr_parent->type));
-  assert(length > 0);
 
   /* Walk over all of the elements that this wildcard refers to and
* call emit_copy_load_store on each one of them */
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] st/mesa: reenable culling

2016-05-19 Thread Dave Airlie
From: Dave Airlie 

Now the lowering pass if fixed, reenable ARB_cull_distance.

Signed-off-by: Dave Airlie 
---
 src/mesa/state_tracker/st_extensions.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index ea60e41..4b9a3bd 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -574,7 +574,7 @@ void st_init_extensions(struct pipe_screen *screen,
   { o(ARB_color_buffer_float),   PIPE_CAP_VERTEX_COLOR_UNCLAMPED   
},
   { o(ARB_conditional_render_inverted),  
PIPE_CAP_CONDITIONAL_RENDER_INVERTED  },
   { o(ARB_copy_image),   
PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS },
-  //{ o(ARB_cull_distance),PIPE_CAP_CULL_DISTANCE  
  },
+  { o(ARB_cull_distance),PIPE_CAP_CULL_DISTANCE
},
   { o(ARB_depth_clamp),  PIPE_CAP_DEPTH_CLIP_DISABLE   
},
   { o(ARB_depth_texture),PIPE_CAP_TEXTURE_SHADOW_MAP   
},
   { o(ARB_derivative_control),   PIPE_CAP_TGSI_FS_FINE_DERIVATIVE  
},
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] i965/gen8: Fix the vertex buffer size

2016-05-19 Thread Topi Pohjolainen
And refactor to use the same upload logic with earlier gens. On
gen >= 8 one doesn't provide ending address but number of bytes
available. This is relative to the given offset. Until now we
programmed the full size of the buffer regardless of the used
offset.

Signed-off-by: Topi Pohjolainen 
---
 src/mesa/drivers/dri/i965/brw_draw_upload.c  | 34 +++
 src/mesa/drivers/dri/i965/gen8_draw_upload.c | 40 +++-
 2 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c 
b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 2164c95..e608476 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -647,7 +647,9 @@ brw_emit_vertex_buffer_state(struct brw_context *brw,
struct gl_context *ctx = >ctx;
uint32_t dw0;
 
-   if (brw->gen >= 6) {
+   if (brw->gen >= 8) {
+  dw0 = buffer_nr << GEN6_VB0_INDEX_SHIFT;
+   } else if (brw->gen >= 6) {
   dw0 = (buffer_nr << GEN6_VB0_INDEX_SHIFT) |
 (step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA
: GEN6_VB0_ACCESS_VERTEXDATA);
@@ -660,15 +662,35 @@ brw_emit_vertex_buffer_state(struct brw_context *brw,
if (brw->gen >= 7)
   dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
 
-   if (brw->gen == 7)
+   switch (brw->gen) {
+   case 7:
   dw0 |= GEN7_MOCS_L3 << 16;
+  break;
+   case 8:
+  dw0 |= BDW_MOCS_WB << 16;
+  break;
+   case 9:
+  dw0 |= SKL_MOCS_WB << 16;
+  break;
+   }
 
WARN_ONCE(stride >= (brw->gen >= 5 ? 2048 : 2047),
  "VBO stride %d too large, bad rendering may occur\n",
  stride);
OUT_BATCH(dw0 | (stride << BRW_VB0_PITCH_SHIFT));
-   OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset);
-   if (brw->gen >= 5) {
+   if (brw->gen >= 8) {
+  OUT_RELOC64(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset);
+  /* From the BSpec: 3D Pipeline Stages - 3D Pipeline Geometry -
+   * Vertex Fetch (VF) Stage - State
+   *
+   * Instead of "VBState.StartingBufferAddress + VBState.MaxIndex x
+   * VBState.BufferPitch", the address of the byte immediately beyond the
+   * last valid byte of the buffer is determined by
+   * "VBState.StartingBufferAddress + VBState.BufferSize".
+   */
+  OUT_BATCH(end_offset - start_offset);
+   } else if (brw->gen >= 5) {
+  OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset);
   /* From the BSpec: 3D Pipeline Stages - 3D Pipeline Geometry -
* Vertex Fetch (VF) Stage - State
*
@@ -678,10 +700,12 @@ brw_emit_vertex_buffer_state(struct brw_context *brw,
*  "VBState.EndAddress + 1".
*/
   OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, end_offset - 1);
+  OUT_BATCH(step_rate);
} else {
+  OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset);
   OUT_BATCH(0);
+  OUT_BATCH(step_rate);
}
-   OUT_BATCH(step_rate);
 
return __map;
 }
diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c 
b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
index dce11dd..4e0c526 100644
--- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
@@ -52,7 +52,6 @@ static void
 gen8_emit_vertices(struct brw_context *brw)
 {
struct gl_context *ctx = >ctx;
-   uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
bool uses_edge_flag;
 
brw_prepare_vertices(brw);
@@ -141,35 +140,28 @@ gen8_emit_vertices(struct brw_context *brw)
   BEGIN_BATCH(1 + 4 * nr_buffers);
   OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1));
   for (unsigned i = 0; i < brw->vb.nr_buffers; i++) {
- struct brw_vertex_buffer *buffer = >vb.buffers[i];
- uint32_t dw0 = 0;
-
- dw0 |= i << GEN6_VB0_INDEX_SHIFT;
- dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
- dw0 |= buffer->stride << BRW_VB0_PITCH_SHIFT;
- dw0 |= mocs_wb << 16;
-
- OUT_BATCH(dw0);
- OUT_RELOC64(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset);
- OUT_BATCH(buffer->bo->size);
+ const struct brw_vertex_buffer *buffer = >vb.buffers[i];
+ EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, buffer->offset,
+  buffer->bo->size, buffer->stride,
+  0 /* unused */);
   }
 
   if (uses_draw_params) {
- OUT_BATCH(brw->vb.nr_buffers << GEN6_VB0_INDEX_SHIFT |
-   GEN7_VB0_ADDRESS_MODIFYENABLE |
-   mocs_wb << 16);
- OUT_RELOC64(brw->draw.draw_params_bo, I915_GEM_DOMAIN_VERTEX, 0,
- brw->draw.draw_params_offset);
- OUT_BATCH(brw->draw.draw_params_bo->size);
+ EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers,
+  brw->draw.draw_params_bo,
+  brw->draw.draw_params_offset,
+  

[Mesa-dev] [PATCH 1/4] glsl: make max array trackers ints and use -1 as base.

2016-05-19 Thread Dave Airlie
From: Dave Airlie 

This fixes a bug that breaks cull distances. The problem
is the max array accessors can't tell the difference between
an never accessed unsized array and an accessed at location 0
unsized array. This leads to converting an undeclared unused
gl_ClipDistance inside or outside gl_PerVertex to a size 1
array. However we need to the number of active clip distances
to work out the starting point for the cull distances, and
this offset by one when it's not being used isn't possible
to distinguish from the case were only the first element is
accessed. I tried to use ->used for this, but that doesn't
work when gl_ClipDistance is part of an interface block.

So this changes things so that max_array_access is an int
and initialised to -1. This also allows unsized arrays to
proceed further than that could before, but we really shouldn't
mind as they will get eliminated if nothing uses them later.

For initialised uniforms we no longer change their array
size at runtime, if these are unused they will get eliminated
eventually.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/ast_array_index.cpp   |  4 ++--
 src/compiler/glsl/ast_to_hir.cpp|  8 
 src/compiler/glsl/ir.cpp|  2 +-
 src/compiler/glsl/ir.h  | 15 +--
 src/compiler/glsl/ir_clone.cpp  |  2 +-
 src/compiler/glsl/ir_validate.cpp   |  6 +++---
 src/compiler/glsl/link_functions.cpp|  4 ++--
 src/compiler/glsl/link_interface_blocks.cpp |  6 --
 src/compiler/glsl/linker.cpp| 14 +++---
 src/mesa/main/ff_fragment_shader.cpp|  6 +++---
 10 files changed, 32 insertions(+), 35 deletions(-)

diff --git a/src/compiler/glsl/ast_array_index.cpp 
b/src/compiler/glsl/ast_array_index.cpp
index 69322cf..2e36035 100644
--- a/src/compiler/glsl/ast_array_index.cpp
+++ b/src/compiler/glsl/ast_array_index.cpp
@@ -92,12 +92,12 @@ update_max_array_access(ir_rvalue *ir, int idx, YYLTYPE 
*loc,
deref_record->record->type->field_index(deref_record->field);
 assert(field_index < deref_var->var->get_interface_type()->length);
 
-unsigned *const max_ifc_array_access =
+int *const max_ifc_array_access =
deref_var->var->get_max_ifc_array_access();
 
 assert(max_ifc_array_access != NULL);
 
-if (idx > (int)max_ifc_array_access[field_index]) {
+if (idx > max_ifc_array_access[field_index]) {
max_ifc_array_access[field_index] = idx;
 
/* Check whether this access will, as a side effect, implicitly
diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index ecfe684..1455bdf 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -976,7 +976,7 @@ do_assignment(exec_list *instructions, struct 
_mesa_glsl_parse_state *state,
 
  assert(var != NULL);
 
- if (var->data.max_array_access >= unsigned(rhs->type->array_size())) {
+ if (var->data.max_array_access >= rhs->type->array_size()) {
 /* FINISHME: This should actually log the location of the RHS. */
 _mesa_glsl_error(& lhs_loc, state, "array size must be > %u due to 
"
  "previous access",
@@ -3858,7 +3858,7 @@ get_variable_being_redeclared(ir_variable *var, YYLTYPE 
loc,
* FINISHME: required or not.
*/
 
-  const unsigned size = unsigned(var->type->array_size());
+  const int size = var->type->array_size();
   check_builtin_array_max_size(var->name, size, loc, state);
   if ((size > 0) && (size <= earlier->data.max_array_access)) {
  _mesa_glsl_error(& loc, state, "array size must be > %u due to "
@@ -7711,7 +7711,7 @@ ast_tcs_output_layout::hir(exec_list *instructions,
   if (!var->type->is_unsized_array() || var->data.patch)
  continue;
 
-  if (var->data.max_array_access >= num_vertices) {
+  if (var->data.max_array_access >= (int)num_vertices) {
 _mesa_glsl_error(, state,
  "this tessellation control shader output layout "
  "specifies %u vertices, but an access to element "
@@ -7772,7 +7772,7 @@ ast_gs_input_layout::hir(exec_list *instructions,
*/
 
   if (var->type->is_unsized_array()) {
- if (var->data.max_array_access >= num_vertices) {
+ if (var->data.max_array_access >= (int)num_vertices) {
 _mesa_glsl_error(, state,
  "this geometry shader input layout implies %u"
  " vertices, but an access to element %u of input"
diff --git a/src/compiler/glsl/ir.cpp b/src/compiler/glsl/ir.cpp
index 9637d7a..5bb3ac3 100644
--- a/src/compiler/glsl/ir.cpp
+++ b/src/compiler/glsl/ir.cpp
@@ -1668,7 +1668,7 @@ ir_variable::ir_variable(const struct glsl_type *type, 
const char *name,
   

[Mesa-dev] [PATCH 1/3] i965/draw: Expose vertex buffer state setup

2016-05-19 Thread Topi Pohjolainen
Also change the interface to use start and end offsets.

Signed-off-by: Topi Pohjolainen 
---
 src/mesa/drivers/dri/i965/brw_draw.h| 13 ++
 src/mesa/drivers/dri/i965/brw_draw_upload.c | 39 +
 2 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.h 
b/src/mesa/drivers/dri/i965/brw_draw.h
index 23d98ef..64ad9b5 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.h
+++ b/src/mesa/drivers/dri/i965/brw_draw.h
@@ -27,9 +27,22 @@
 #define BRW_DRAW_H
 
 #include "main/mtypes.h"   /* for struct gl_context... */
+#include "intel_bufmgr.h"
 
 struct brw_context;
 
+uint32_t *
+brw_emit_vertex_buffer_state(struct brw_context *brw,
+ unsigned buffer_nr,
+ drm_intel_bo *bo,
+ unsigned start_offset,
+ unsigned end_offset,
+ unsigned stride,
+ unsigned step_rate,
+ uint32_t *__map);
+
+#define EMIT_VERTEX_BUFFER_STATE(...) __map = \
+   brw_emit_vertex_buffer_state(__VA_ARGS__, __map)
 
 void brw_draw_prims(struct gl_context *ctx,
 const struct _mesa_prim *prims,
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c 
b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 5af4583..2164c95 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -634,15 +634,15 @@ brw_prepare_shader_draw_parameters(struct brw_context 
*brw)
 /**
  * Emit a VERTEX_BUFFER_STATE entry (part of 3DSTATE_VERTEX_BUFFERS).
  */
-static uint32_t *
-emit_vertex_buffer_state(struct brw_context *brw,
- unsigned buffer_nr,
- drm_intel_bo *bo,
- unsigned bo_ending_address,
- unsigned bo_offset,
- unsigned stride,
- unsigned step_rate,
- uint32_t *__map)
+uint32_t *
+brw_emit_vertex_buffer_state(struct brw_context *brw,
+ unsigned buffer_nr,
+ drm_intel_bo *bo,
+ unsigned start_offset,
+ unsigned end_offset,
+ unsigned stride,
+ unsigned step_rate,
+ uint32_t *__map)
 {
struct gl_context *ctx = >ctx;
uint32_t dw0;
@@ -667,9 +667,17 @@ emit_vertex_buffer_state(struct brw_context *brw,
  "VBO stride %d too large, bad rendering may occur\n",
  stride);
OUT_BATCH(dw0 | (stride << BRW_VB0_PITCH_SHIFT));
-   OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, bo_offset);
+   OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset);
if (brw->gen >= 5) {
-  OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, bo_ending_address);
+  /* From the BSpec: 3D Pipeline Stages - 3D Pipeline Geometry -
+   * Vertex Fetch (VF) Stage - State
+   *
+   *  Instead of "VBState.StartingBufferAddress + VBState.MaxIndex x
+   *  VBState.BufferPitch", the address of the byte immediately beyond the
+   *  last valid byte of the buffer is determined by
+   *  "VBState.EndAddress + 1".
+   */
+  OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, end_offset - 1);
} else {
   OUT_BATCH(0);
}
@@ -677,7 +685,6 @@ emit_vertex_buffer_state(struct brw_context *brw,
 
return __map;
 }
-#define EMIT_VERTEX_BUFFER_STATE(...) __map = 
emit_vertex_buffer_state(__VA_ARGS__, __map)
 
 static void
 brw_emit_vertices(struct brw_context *brw)
@@ -745,8 +752,8 @@ brw_emit_vertices(struct brw_context *brw)
   OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1));
   for (i = 0; i < brw->vb.nr_buffers; i++) {
 struct brw_vertex_buffer *buffer = >vb.buffers[i];
- EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, buffer->bo->size - 1,
-  buffer->offset, buffer->stride,
+ EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, buffer->offset,
+  buffer->bo->size, buffer->stride,
   buffer->step_rate);
 
   }
@@ -754,8 +761,8 @@ brw_emit_vertices(struct brw_context *brw)
   if (uses_draw_params) {
  EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers,
   brw->draw.draw_params_bo,
-  brw->draw.draw_params_bo->size - 1,
   brw->draw.draw_params_offset,
+  brw->draw.draw_params_bo->size,
   0,  /* stride */
   0); /* step rate */
   }
@@ -763,8 +770,8 @@ brw_emit_vertices(struct brw_context *brw)
   if (brw->vs.prog_data->uses_drawid) {
  

[Mesa-dev] [PATCH 2/4] glsl: rewrite clip/cull distance lowering pass

2016-05-19 Thread Dave Airlie
From: Dave Airlie 

The last version of this broke clipping, and I had to spend
sometime getting this working properly.

I had to introduce a third pass to count the clip/cull totals,
all due to one messy corner case. We have a piglit test
tes-input-gl_ClipDistance.shader_test
that doesn't actually output the clip distances, it just passes
them like a varying from TCS->TES, the older lowering pass worked
but to lower clip/cull we need to know the total number of clip+culls
used to defined the new variable correctly, and to offset culls
properly.

This adds an extra pass that works out the sizes for clip/cull,
then lowers gl_ClipDistance then gl_CullDistance into the new
gl_ClipDistanceMESA.

The pass checks using the fixed array sizes code if they array
has been referenced, or is actually never used, and ignores
it in the latter case.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/ir_optimization.h  |   2 +-
 src/compiler/glsl/linker.cpp |   2 +-
 src/compiler/glsl/lower_distance.cpp | 220 ++-
 3 files changed, 164 insertions(+), 60 deletions(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index 5fc2740..71b10e4 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -119,7 +119,7 @@ bool lower_variable_index_to_cond_assign(gl_shader_stage 
stage,
 bool lower_temp, bool lower_uniform);
 bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
 bool lower_const_arrays_to_uniforms(exec_list *instructions);
-bool lower_clip_distance(gl_shader *shader);
+bool lower_clip_cull_distance(struct gl_shader_program *prog, gl_shader 
*shader);
 void lower_output_reads(unsigned stage, exec_list *instructions);
 bool lower_packing_builtins(exec_list *instructions, int op_mask);
 void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size);
diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index b856631..4b5b32c 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -4639,7 +4639,7 @@ link_shaders(struct gl_context *ctx, struct 
gl_shader_program *prog)
 goto done;
 
   if (ctx->Const.ShaderCompilerOptions[i].LowerCombinedClipCullDistance) {
- lower_clip_distance(prog->_LinkedShaders[i]);
+ lower_clip_cull_distance(prog, prog->_LinkedShaders[i]);
   }
 
   if (ctx->Const.LowerTessLevel) {
diff --git a/src/compiler/glsl/lower_distance.cpp 
b/src/compiler/glsl/lower_distance.cpp
index 301afe4..f21a1be 100644
--- a/src/compiler/glsl/lower_distance.cpp
+++ b/src/compiler/glsl/lower_distance.cpp
@@ -45,19 +45,41 @@
  * LowerCombinedClipCullDistance flag in gl_shader_compiler_options to true.
  */
 
+#include "main/macros.h"
 #include "glsl_symbol_table.h"
 #include "ir_rvalue_visitor.h"
 #include "ir.h"
 #include "program/prog_instruction.h" /* For WRITEMASK_* */
 
+#define GLSL_CLIP_VAR_NAME "gl_ClipDistanceMESA"
+
 namespace {
 
 class lower_distance_visitor : public ir_rvalue_visitor {
 public:
-   explicit lower_distance_visitor(gl_shader_stage shader_stage)
+   explicit lower_distance_visitor(gl_shader_stage shader_stage,
+  const char *in_name, int total_size,
+  int offset)
   : progress(false), old_distance_out_var(NULL),
 old_distance_in_var(NULL), new_distance_out_var(NULL),
-new_distance_in_var(NULL), shader_stage(shader_stage)
+new_distance_in_var(NULL), shader_stage(shader_stage),
+in_name(in_name), total_size(total_size), offset(offset)
+   {
+   }
+
+   explicit lower_distance_visitor(gl_shader_stage shader_stage,
+  const char *in_name,
+  const lower_distance_visitor *orig,
+  int offset)
+  : progress(false),
+   old_distance_out_var(NULL),
+old_distance_in_var(NULL),
+   new_distance_out_var(orig->new_distance_out_var),
+new_distance_in_var(orig->new_distance_in_var),
+   shader_stage(shader_stage),
+in_name(in_name),
+   total_size(orig->total_size),
+   offset(offset)
{
}
 
@@ -100,12 +122,15 @@ public:
 * Type of shader we are compiling (e.g. MESA_SHADER_VERTEX)
 */
const gl_shader_stage shader_stage;
+   const char *in_name;
+   int total_size;
+   int offset;
 };
 
 } /* anonymous namespace */
 
 /**
- * Replace any declaration of gl_ClipDistance as an array of floats with a
+ * Replace any declaration of 'in_name' as an array of floats with a
  * declaration of gl_ClipDistanceMESA as an array of vec4's.
  */
 ir_visitor_status
@@ -114,7 +139,7 @@ lower_distance_visitor::visit(ir_variable *ir)
ir_variable **old_var;
ir_variable **new_var;
 
-   if (!ir->name || strcmp(ir->name, "gl_ClipDistance") != 0)
+   if (!ir->name || strcmp(ir->name, in_name) != 0)

[Mesa-dev] [PATCH 3/4] i965: reenable ARB_cull_distance.

2016-05-19 Thread Dave Airlie
From: Dave Airlie 

Now the lowering pass is fixed we can reenable culling.

Signed-off-by: Dave Airlie 
---
 src/mesa/drivers/dri/i965/intel_extensions.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 878bd84..0f0d1ce 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -291,7 +291,7 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.ARB_blend_func_extended =
  !driQueryOptionb(>optionCache, "disable_blend_func_extended");
   ctx->Extensions.ARB_conditional_render_inverted = true;
-  ctx->Extensions.ARB_cull_distance = false;
+  ctx->Extensions.ARB_cull_distance = true;
   ctx->Extensions.ARB_draw_buffers_blend = true;
   ctx->Extensions.ARB_ES3_compatibility = true;
   ctx->Extensions.ARB_fragment_layer_viewport = true;
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] vc4: Fix failed instruction path of QIR validate pass

2016-05-19 Thread Rhys Kidd
Correct use of qir_dump_inst() within QIR validate pass.

Reported by the following GCC warning:

mesa/src/gallium/drivers/vc4/vc4_qir_validate.c: In function 'fail_instr':
mesa/src/gallium/drivers/vc4/vc4_qir_validate.c:31:23: warning: passing 
argument 1 of 'qir_dump_inst' from incompatible pointer type
 qir_dump_inst(stderr, inst);
   ^
In file included from mesa/src/gallium/drivers/vc4/vc4_qir_validate.c:24:0:
mesa/src/gallium/drivers/vc4/vc4_qir.h:489:6: note: expected 'struct 
vc4_compile *' but argument is of type 'struct _IO_FILE *'
 void qir_dump_inst(struct vc4_compile *c, struct qinst *inst);
  ^

Introduced in 8e2d0843c02daf5280184f179ae8ed440ac90d7f.

Signed-off-by: Rhys Kidd 
---
 src/gallium/drivers/vc4/vc4_qir_validate.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qir_validate.c 
b/src/gallium/drivers/vc4/vc4_qir_validate.c
index af2e3ba..da6457c 100644
--- a/src/gallium/drivers/vc4/vc4_qir_validate.c
+++ b/src/gallium/drivers/vc4/vc4_qir_validate.c
@@ -25,10 +25,10 @@
 #include "vc4_qpu.h"
 
 static void
-fail_instr(struct qinst *inst, const char *msg)
+fail_instr(struct vc4_compile *c, struct qinst *inst, const char *msg)
 {
 fprintf(stderr, "qir_validate: %s: ", msg);
-qir_dump_inst(stderr, inst);
+qir_dump_inst(c, inst);
 fprintf(stderr, "\n");
 abort();
 }
@@ -50,18 +50,18 @@ void qir_validate(struct vc4_compile *c)
 struct qinst *def = c->defs[i];
 
 if (def && def->cond != QPU_COND_ALWAYS)
-fail_instr(def, "SSA def with condition");
+fail_instr(c, def, "SSA def with condition");
 }
 
 list_for_each_entry(struct qinst, inst, >instructions, link) {
 switch (inst->dst.file) {
 case QFILE_TEMP:
 if (inst->dst.index >= c->num_temps)
-fail_instr(inst, "bad temp index");
+fail_instr(c, inst, "bad temp index");
 
 if (c->defs[inst->dst.index] &&
 already_assigned[inst->dst.index]) {
-fail_instr(inst, "Re-assignment of SSA value");
+fail_instr(c, inst, "Re-assignment of SSA 
value");
 }
 already_assigned[inst->dst.index] = true;
 break;
@@ -81,7 +81,7 @@ void qir_validate(struct vc4_compile *c)
 case QFILE_FRAG_REV_FLAG:
 case QFILE_SMALL_IMM:
 case QFILE_LOAD_IMM:
-fail_instr(inst, "Bad dest file");
+fail_instr(c, inst, "Bad dest file");
 break;
 }
 
@@ -91,7 +91,7 @@ void qir_validate(struct vc4_compile *c)
 switch (src.file) {
 case QFILE_TEMP:
 if (src.index >= c->num_temps)
-fail_instr(inst, "bad temp index");
+fail_instr(c, inst, "bad temp index");
 break;
 
 case QFILE_VARY:
@@ -102,14 +102,14 @@ void qir_validate(struct vc4_compile *c)
 
 case QFILE_SMALL_IMM:
 if (qpu_encode_small_immediate(src.index) == 
~0)
-fail_instr(inst, "bad small 
immediate");
+fail_instr(c, inst, "bad small 
immediate");
 break;
 
 case QFILE_FRAG_X:
 case QFILE_FRAG_Y:
 case QFILE_FRAG_REV_FLAG:
 if (c->stage != QSTAGE_FRAG)
-fail_instr(inst, "frag access in 
VS/CS");
+fail_instr(c, inst, "frag access in 
VS/CS");
 break;
 
 case QFILE_NULL:
@@ -117,7 +117,7 @@ void qir_validate(struct vc4_compile *c)
 case QFILE_TLB_COLOR_WRITE_MS:
 case QFILE_TLB_Z_WRITE:
 case QFILE_TLB_STENCIL_SETUP:
-fail_instr(inst, "Bad src file");
+fail_instr(c, inst, "Bad src file");
 break;
 }
 }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/12] nir: Don't use ffma in nir_lower_wpos_ytransform().

2016-05-19 Thread Matt Turner
On Thu, May 19, 2016 at 6:41 PM, Kenneth Graunke  wrote:
> On Thursday, May 19, 2016 12:57:44 PM PDT Rob Clark wrote:
>> On Wed, May 18, 2016 at 6:00 PM, Kenneth Graunke 
> wrote:
>> > ffma is an explicitly fused multiply add with higher precision.
>> > The optimizer will take care of promoting mul/add to fma when
>> > it's beneficial to do so.
>> >
>> > This fixes failures on Gen4-5 when using this pass, as those platforms
>> > don't actually implement fma().
>>
>> hmm, we can't rely on the opt-algebraic pass to do this?
>>
>> BR,
>> -R
>
> We can rely on either nir_opt_algebraic (with the fuse_ffma flag set)
> or brw_nir_opt_peephole_ffma() (if someone wants to move it to
> src/compiler/nir and use it) to fuse add+mul into ffma.
>
> However, we can't rely on nir_opt_algebraic to split up ffma into
> mul+add for us.  We made it stop doing that a little while ago,
> so that the GLSL fma() built-in is always higher precision.  (The
> thinking is that if apps didn't care, they would just write (a*b+c),
> and that splitting fma() is pretty bunk...and splitting and reassembling
> so fma() has /inconsistent/ precision is even more bunk...)
>
> I suppose I could just set lower_ffma in i965's nir_compiler_options
> for Gen < 6 where we don't have a MAD instruction (and don't support
> the GLSL fma() built-in function, either).  That might be more sensible.

FWIW, my personal preference is to take this patch as is.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/14] radeonsi: Offchip tessellation

2016-05-19 Thread Dieter Nützel

Am 10.05.2016 12:52, schrieb Bas Nieuwenhuizen:
This patchset implements offchip tessellation after which we can 
finally process

more than one patch per wave without decreasing tessmark scores.

For tessmark this improves performance by ~20% for the x32 case and 
~80% for the
x64 case. x8 and x16 have roughly the same performance as before. 
Unigine heaven
gets 43 fps compared to 28 before (roughly +50%). Amdgpu-pro gets 44 
fps for
heaven. For Shadow of Mordor the performance changes from 28 fps to 40 
fps

(roughly +40%).


[-]

Bas,

do we see this with Mesa 11.3 / 12.0?
Should read did you have an updated version ready for release?

Thanks,
  Dieter
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 4/4] anv/pipeline: Bounds-check resource indices when robuts_buffer_access is enabled

2016-05-19 Thread Jason Ekstrand
On May 19, 2016 12:50 AM, "Michael Schellenberger Costa" <
mschellenbergerco...@googlemail.com> wrote:
>
> Hi Jason,
>
> Am 19.05.2016 um 09:22 schrieb Jason Ekstrand:
> > ---
> >  src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 52

> >  1 file changed, 35 insertions(+), 17 deletions(-)
> >
> > diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
> > index 91f4322..7e66149 100644
> > --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
> > +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
> > @@ -29,6 +29,9 @@ struct apply_pipeline_layout_state {
> > nir_shader *shader;
> > nir_builder builder;
> >
> > +   struct anv_pipeline_layout *layout;
> > +   bool add_bounds_checks;
> > +
> > struct {
> >BITSET_WORD *used;
> >uint8_t *surface_offsets;
> > @@ -110,17 +113,15 @@ lower_res_index_intrinsic(nir_intrinsic_instr
*intrin,
> > uint32_t binding = nir_intrinsic_binding(intrin);
> >
> > uint32_t surface_index = state->set[set].surface_offsets[binding];
> > +   uint32_t array_size =
> > +  state->layout->set[set].layout->binding[binding].array_size;
> >
> > -   nir_const_value *const_block_idx =
> > -  nir_src_as_const_value(intrin->src[0]);
> > +   nir_ssa_def *block_index = nir_ssa_for_src(b, intrin->src[0], 1);
> >
> > -   nir_ssa_def *block_index;
> > -   if (const_block_idx) {
> > -  block_index = nir_imm_int(b, surface_index +
const_block_idx->u32[0]);
> > -   } else {
> > -  block_index = nir_iadd(b, nir_imm_int(b, surface_index),
> > - nir_ssa_for_src(b, intrin->src[0], 1));
> > -   }
> > +   if (state->add_bounds_checks)
> > +  block_index = nir_umax(b, block_index, nir_imm_int(b, array_size
- 1));
> > +
> > +   block_index = nir_iadd(b, nir_imm_int(b, surface_index),
block_index);
> Here you do
> |   if (state->add_bounds_checks)
> |   block_index = nir_umax(...);
> |   block_index = nir_iadd(...);
>
> Below you do
> |   nir_ssa_def *index = iadd(...);
> |   if (state->add_bounds_checks)
> |   index = nir_umax(...);
>
> Are both functionally equivalent? Also why do you one time do

No but both are correct.  In the case above, we are computing

resource_index(set, binding, index) = resource_index(set, binding, 0) +
umin(index, bound)

(I just realized I got umin and umax backwards).  In the second case, the
index is split into a base and an indirect so we actually have 3 pieces:
resource_index(set, binding, 0), base_offset, and the indirect.  This means
that instead of the index above we have base+indirect.  Does that make
sense?

> | nir_ssa_def *block_index = nir_ssa_for_src()
>
> and the other time put that directly into nir_iadd()?
>
> Could you unify the code so that both cases are equivalent?
> --Michael
>
>
> >
> > assert(intrin->dest.is_ssa);
> > nir_ssa_def_rewrite_uses(>dest.ssa,
nir_src_for_ssa(block_index));
> > @@ -129,16 +130,24 @@ lower_res_index_intrinsic(nir_intrinsic_instr
*intrin,
> >
> >  static void
> >  lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref,
> > -unsigned *const_index, nir_tex_src_type src_type,
> > +unsigned *const_index, unsigned array_size,
> > +nir_tex_src_type src_type,
> >  struct apply_pipeline_layout_state *state)
> >  {
> > +   nir_builder *b = >builder;
> > +
> > if (deref->deref.child) {
> >assert(deref->deref.child->deref_type == nir_deref_type_array);
> >nir_deref_array *deref_array =
nir_deref_as_array(deref->deref.child);
> >
> > -  *const_index += deref_array->base_offset;
> > -
> >if (deref_array->deref_array_type ==
nir_deref_array_type_indirect) {
> > + nir_ssa_def *index =
> > +nir_iadd(b, nir_imm_int(b, deref_array->base_offset),
> > +nir_ssa_for_src(b, deref_array->indirect, 1));
> > +
> > + if (state->add_bounds_checks)
> > +index = nir_umax(b, index, nir_imm_int(b, array_size - 1));
> > +
> >   nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
> > tex->num_srcs + 1);
> >
> > @@ -154,10 +163,11 @@ lower_tex_deref(nir_tex_instr *tex, nir_deref_var
*deref,
> >* first-class texture source.
> >*/
> >   tex->src[tex->num_srcs].src_type = src_type;
> > + nir_instr_rewrite_src(>instr, >src[tex->num_srcs],
> > +   nir_src_for_ssa(index));
> >   tex->num_srcs++;
> > - assert(deref_array->indirect.is_ssa);
> > - nir_instr_rewrite_src(>instr, >src[tex->num_srcs -
1].src,
> > -   deref_array->indirect);
> > +  } else {
> > + *const_index += MIN2(deref_array->base_offset, array_size -
1);
> >}
> > }
> >  }
> > @@ -182,17 +192,23 @@ lower_tex(nir_tex_instr *tex, struct

Re: [Mesa-dev] [PATCH 08/12] nir: Add a simple nir_lower_wpos_center() pass for Vulkan drivers.

2016-05-19 Thread Kenneth Graunke
On Thursday, May 19, 2016 1:21:16 PM PDT Matt Turner wrote:
> On Wed, May 18, 2016 at 3:00 PM, Kenneth Graunke  
wrote:
> > nir_lower_wpos_ytransform() is great for OpenGL, which allows
> > applications to choose whether their coordinate system's origin is
> > upper left/lower left, and whether the pixel center should be on
> > integer/half-integer boundaries.
> >
> > Vulkan, however, has much simpler requirements: the pixel center
> > is always half-integer, and the origin is always upper left.  No
> > coordinate transform is needed - we just need to add <0.5, 0.5>.
> > This means that we can avoid using (and setting up) a uniform.
> >
> > I thought about adding more options to nir_lower_wpos_ytransform(),
> > but making a new pass that never even touched uniforms seemed simpler.
> >
> > Signed-off-by: Kenneth Graunke 
> > ---
> >  src/compiler/Makefile.sources|   1 +
> >  src/compiler/nir/nir.h   |   1 +
> >  src/compiler/nir/nir_lower_wpos_center.c | 107 ++
+
> >  3 files changed, 109 insertions(+)
> >  create mode 100644 src/compiler/nir/nir_lower_wpos_center.c
> >
> > diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
> > index 97f9eb4..b8f2b49 100644
> > --- a/src/compiler/Makefile.sources
> > +++ b/src/compiler/Makefile.sources
> > @@ -212,6 +212,7 @@ NIR_FILES = \
> > nir/nir_lower_vars_to_ssa.c \
> > nir/nir_lower_var_copies.c \
> > nir/nir_lower_vec_to_movs.c \
> > +   nir/nir_lower_wpos_center.c \
> > nir/nir_lower_wpos_ytransform.c \
> > nir/nir_metadata.c \
> > nir/nir_move_vec_src_uses_to_dest.c \
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > index a21a7bd..78913d3 100644
> > --- a/src/compiler/nir/nir.h
> > +++ b/src/compiler/nir/nir.h
> > @@ -2407,6 +2407,7 @@ typedef struct nir_lower_wpos_ytransform_options {
> >
> >  bool nir_lower_wpos_ytransform(nir_shader *shader,
> > const nir_lower_wpos_ytransform_options 
*options);
> > +bool nir_lower_wpos_center(nir_shader *shader);
> >
> >  typedef struct nir_lower_drawpixels_options {
> > int texcoord_state_tokens[5];
> > diff --git a/src/compiler/nir/nir_lower_wpos_center.c b/src/compiler/nir/
nir_lower_wpos_center.c
> > new file mode 100644
> > index 000..d66109d
> > --- /dev/null
> > +++ b/src/compiler/nir/nir_lower_wpos_center.c
> > @@ -0,0 +1,107 @@
> > +/*
> > + * Copyright © 2015 Red Hat
> > + * Copyright © 2016 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person obtaining 
a
> > + * copy of this software and associated documentation files (the 
"Software"),
> > + * to deal in the Software without restriction, including without 
limitation
> > + * the rights to use, copy, modify, merge, publish, distribute, 
sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the 
next
> > + * paragraph) shall be included in all copies or substantial portions of 
the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT 
SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
DEALINGS
> > + * IN THE SOFTWARE.
> > + */
> > +
> > +#include "nir.h"
> > +#include "nir_builder.h"
> > +#include "program/prog_instruction.h"
> > +
> > +/**
> > + * This pass adds <0.5, 0.5> to all uses of gl_FragCoord.
> > + *
> > + * Run before nir_lower_io().
> > + *
> > + * For a more full featured pass, consider using 
nir_lower_wpos_ytransform(),
> > + * which can handle pixel center integer / half integer, and origin lower
> > + * left / upper left transformations.
> > + *
> > + * This simple pass is primarily intended for use by Vulkan drivers on
> > + * hardware which provides an integer pixel center.  Vulkan mandates that
> > + * the pixel center must be half-integer, and also that the coordinate
> > + * system's origin must be upper left.  This means that there's no need
> > + * for a uniform - we can always just add a constant.
> > + */
> > +
> > +static void
> > +add_half_to_fragcoord(nir_builder *b, nir_intrinsic_instr *intr)
> > +{
> > +   nir_ssa_def *wpos = >dest.ssa;
> > +
> > +   assert(intr->dest.is_ssa);
> > +
> > +   b->cursor = nir_after_instr(>instr);
> > +
> > +   wpos = nir_fadd(b, wpos, nir_imm_vec4(b, 0.5f, 0.5f, 0.0f, 0.0f));
> > +
> > +   nir_ssa_def_rewrite_uses_after(>dest.ssa, 

Re: [Mesa-dev] [PATCH 07/12] nir: Don't use ffma in nir_lower_wpos_ytransform().

2016-05-19 Thread Kenneth Graunke
On Thursday, May 19, 2016 12:57:44 PM PDT Rob Clark wrote:
> On Wed, May 18, 2016 at 6:00 PM, Kenneth Graunke  
wrote:
> > ffma is an explicitly fused multiply add with higher precision.
> > The optimizer will take care of promoting mul/add to fma when
> > it's beneficial to do so.
> >
> > This fixes failures on Gen4-5 when using this pass, as those platforms
> > don't actually implement fma().
> 
> hmm, we can't rely on the opt-algebraic pass to do this?
> 
> BR,
> -R

We can rely on either nir_opt_algebraic (with the fuse_ffma flag set)
or brw_nir_opt_peephole_ffma() (if someone wants to move it to
src/compiler/nir and use it) to fuse add+mul into ffma.

However, we can't rely on nir_opt_algebraic to split up ffma into
mul+add for us.  We made it stop doing that a little while ago,
so that the GLSL fma() built-in is always higher precision.  (The
thinking is that if apps didn't care, they would just write (a*b+c),
and that splitting fma() is pretty bunk...and splitting and reassembling
so fma() has /inconsistent/ precision is even more bunk...)

I suppose I could just set lower_ffma in i965's nir_compiler_options
for Gen < 6 where we don't have a MAD instruction (and don't support
the GLSL fma() built-in function, either).  That might be more sensible.

--Ken


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Pass nir_src/nir_dest by reference.

2016-05-19 Thread Jason Ekstrand
On May 19, 2016 2:42 PM, "Matt Turner"  wrote:
>
> Cuts 6K of .text.

Nice!

Reviewed-by: Jason Ekstrand 

>textdata bss dec hex filename
> 5772372  264648   29320 6066340  5c90a4 lib/i965_dri.so before
> 5766074  264648   29320 6060042  5c780a lib/i965_dri.so after
> ---
>  src/mesa/drivers/dri/i965/brw_fs.h |  4 ++--
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp   |  4 ++--
>  src/mesa/drivers/dri/i965/brw_vec4.h   | 12 ++--
>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 12 ++--
>  4 files changed, 16 insertions(+), 16 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
b/src/mesa/drivers/dri/i965/brw_fs.h
> index ac270cd..ab79cc8 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -256,8 +256,8 @@ public:
>   nir_tex_instr *instr);
> void nir_emit_jump(const brw::fs_builder ,
>nir_jump_instr *instr);
> -   fs_reg get_nir_src(nir_src src);
> -   fs_reg get_nir_dest(nir_dest dest);
> +   fs_reg get_nir_src(const nir_src );
> +   fs_reg get_nir_dest(const nir_dest );
> fs_reg get_nir_image_deref(const nir_deref_var *deref);
> fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
> void emit_percomp(const brw::fs_builder , const fs_inst ,
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index ebcc92a..c12e8ee 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -1451,7 +1451,7 @@ fs_visitor::nir_emit_undef(const fs_builder ,
nir_ssa_undef_instr *instr)
>  }
>
>  fs_reg
> -fs_visitor::get_nir_src(nir_src src)
> +fs_visitor::get_nir_src(const nir_src )
>  {
> fs_reg reg;
> if (src.is_ssa) {
> @@ -1471,7 +1471,7 @@ fs_visitor::get_nir_src(nir_src src)
>  }
>
>  fs_reg
> -fs_visitor::get_nir_dest(nir_dest dest)
> +fs_visitor::get_nir_dest(const nir_dest )
>  {
> if (dest.is_ssa) {
>const brw_reg_type reg_type =
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h
b/src/mesa/drivers/dri/i965/brw_vec4.h
> index bc54eaf..76dea04 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -326,14 +326,14 @@ public:
> virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
> virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
>
> -   dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
> -   dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
> -   dst_reg get_nir_dest(nir_dest dest);
> -   src_reg get_nir_src(nir_src src, enum brw_reg_type type,
> +   dst_reg get_nir_dest(const nir_dest , enum brw_reg_type type);
> +   dst_reg get_nir_dest(const nir_dest , nir_alu_type type);
> +   dst_reg get_nir_dest(const nir_dest );
> +   src_reg get_nir_src(const nir_src , enum brw_reg_type type,
> unsigned num_components = 4);
> -   src_reg get_nir_src(nir_src src, nir_alu_type type,
> +   src_reg get_nir_src(const nir_src , nir_alu_type type,
> unsigned num_components = 4);
> -   src_reg get_nir_src(nir_src src,
> +   src_reg get_nir_src(const nir_src ,
> unsigned num_components = 4);
> src_reg get_indirect_offset(nir_intrinsic_instr *instr);
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index 29f52fa..f3b4528 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -267,7 +267,7 @@ dst_reg_for_nir_reg(vec4_visitor *v, nir_register
*nir_reg,
>  }
>
>  dst_reg
> -vec4_visitor::get_nir_dest(nir_dest dest)
> +vec4_visitor::get_nir_dest(const nir_dest )
>  {
> if (dest.is_ssa) {
>dst_reg dst = dst_reg(VGRF, alloc.allocate(1));
> @@ -280,19 +280,19 @@ vec4_visitor::get_nir_dest(nir_dest dest)
>  }
>
>  dst_reg
> -vec4_visitor::get_nir_dest(nir_dest dest, enum brw_reg_type type)
> +vec4_visitor::get_nir_dest(const nir_dest , enum brw_reg_type type)
>  {
> return retype(get_nir_dest(dest), type);
>  }
>
>  dst_reg
> -vec4_visitor::get_nir_dest(nir_dest dest, nir_alu_type type)
> +vec4_visitor::get_nir_dest(const nir_dest , nir_alu_type type)
>  {
> return get_nir_dest(dest, brw_type_for_nir_type(type));
>  }
>
>  src_reg
> -vec4_visitor::get_nir_src(nir_src src, enum brw_reg_type type,
> +vec4_visitor::get_nir_src(const nir_src , enum brw_reg_type type,
>unsigned num_components)
>  {
> dst_reg reg;
> @@ -314,14 +314,14 @@ vec4_visitor::get_nir_src(nir_src src, enum
brw_reg_type type,
>  }
>
>  src_reg
> -vec4_visitor::get_nir_src(nir_src src, nir_alu_type type,
> +vec4_visitor::get_nir_src(const nir_src , nir_alu_type type,
>unsigned num_components)
>  {
> return get_nir_src(src, brw_type_for_nir_type(type), num_components);
>  }
>
>  src_reg

[Mesa-dev] [PATCH v2 08/13] i965/draw: Use the real size for vertex buffers

2016-05-19 Thread Jason Ekstrand
Previously, we were using the size of the BO which may be substantially
larger than the actual vertex buffer size.

v2: Use actual tight bounds on Bay Trail and Haswell+
---
 src/mesa/drivers/dri/i965/brw_context.h  |  1 +
 src/mesa/drivers/dri/i965/brw_draw_upload.c  | 16 +++-
 src/mesa/drivers/dri/i965/gen8_draw_upload.c |  2 +-
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 76ed1de..7a3afc9 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -569,6 +569,7 @@ struct brw_vertex_buffer {
/** Buffer object containing the uploaded vertex data */
drm_intel_bo *bo;
uint32_t offset;
+   uint32_t size;
/** Byte stride between elements in the uploaded array */
GLuint stride;
GLuint step_rate;
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c 
b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index d9e46ec..fd6ea8c 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -411,6 +411,7 @@ copy_array_to_vbo_array(struct brw_context *brw,
>bo, >offset);
 
   buffer->stride = 0;
+  buffer->size = element->glarray->_ElementSize;
   return;
}
 
@@ -430,6 +431,7 @@ copy_array_to_vbo_array(struct brw_context *brw,
   }
}
buffer->stride = dst_stride;
+   buffer->size = size;
 }
 
 void
@@ -606,6 +608,8 @@ brw_prepare_vertices(struct brw_context *brw)
 
   buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start, 
range);
   drm_intel_bo_reference(buffer->bo);
+
+  buffer->size = start + range;
}
 
/* If we need to upload all the arrays, then we can trim those arrays to
@@ -629,6 +633,7 @@ brw_prepare_vertices(struct brw_context *brw)
 copy_array_to_vbo_array(brw, upload[0], min_index, max_index,
 buffer, interleaved);
 buffer->offset -= delta * interleaved;
+ buffer->size += delta * interleaved;
 
 for (i = 0; i < nr_uploads; i++) {
/* Then, just point upload[i] at upload[0]'s buffer. */
@@ -658,6 +663,7 @@ brw_prepare_vertices(struct brw_context *brw)
  buffer, upload[i]->glarray->_ElementSize);
   }
   buffer->offset -= delta * buffer->stride;
+  buffer->size += delta * buffer->stride;
   buffer->step_rate = upload[i]->glarray->InstanceDivisor;
   upload[i]->buffer = j++;
   upload[i]->offset = 0;
@@ -799,7 +805,15 @@ brw_emit_vertices(struct brw_context *brw)
   OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1));
   for (i = 0; i < brw->vb.nr_buffers; i++) {
 struct brw_vertex_buffer *buffer = >vb.buffers[i];
- EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, buffer->bo->size - 1,
+ /* Prior to Haswell and Bay Trail we have to use 4-component formats
+  * to fake 3-component ones.  In particular, we do this for
+  * half-float and 8 and 16-bit integer formats.  This means that the
+  * vertex element may poke over the end of the buffer by 2 bytes.
+  */
+ unsigned padding =
+(brw->gen <= 7 && !brw->is_baytrail && !brw->is_haswell) * 2;
+ EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo,
+  buffer->offset + buffer->size + padding - 1,
   buffer->offset, buffer->stride,
   buffer->step_rate);
 
diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c 
b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
index dce11dd..4bb3a59 100644
--- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
@@ -151,7 +151,7 @@ gen8_emit_vertices(struct brw_context *brw)
 
  OUT_BATCH(dw0);
  OUT_RELOC64(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset);
- OUT_BATCH(buffer->bo->size);
+ OUT_BATCH(buffer->size);
   }
 
   if (uses_draw_params) {
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7.9/13] i965/draw: Use 3-channel formats for vertex fetch when possible.

2016-05-19 Thread Jason Ekstrand
For a long time, several of the 3-channel vertex formats didn't exist so we
faked them with 4-channel versions.  Starting with Sandy Bridge, we can use
R16G16B16_FLOAT and 8 and 16-bit integer formats become available on
Haswell and Bay Trail.
---
 src/mesa/drivers/dri/i965/brw_draw_upload.c | 48 ++---
 1 file changed, 37 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c 
b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index b651fd2..d9e46ec 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -65,7 +65,7 @@ static const GLuint half_float_types[5] = {
0,
BRW_SURFACEFORMAT_R16_FLOAT,
BRW_SURFACEFORMAT_R16G16_FLOAT,
-   BRW_SURFACEFORMAT_R16G16B16A16_FLOAT,
+   BRW_SURFACEFORMAT_R16G16B16_FLOAT,
BRW_SURFACEFORMAT_R16G16B16A16_FLOAT
 };
 
@@ -129,7 +129,7 @@ static const GLuint ushort_types_direct[5] = {
0,
BRW_SURFACEFORMAT_R16_UINT,
BRW_SURFACEFORMAT_R16G16_UINT,
-   BRW_SURFACEFORMAT_R16G16B16A16_UINT,
+   BRW_SURFACEFORMAT_R16G16B16_UINT,
BRW_SURFACEFORMAT_R16G16B16A16_UINT
 };
 
@@ -153,7 +153,7 @@ static const GLuint short_types_direct[5] = {
0,
BRW_SURFACEFORMAT_R16_SINT,
BRW_SURFACEFORMAT_R16G16_SINT,
-   BRW_SURFACEFORMAT_R16G16B16A16_SINT,
+   BRW_SURFACEFORMAT_R16G16B16_SINT,
BRW_SURFACEFORMAT_R16G16B16A16_SINT
 };
 
@@ -177,7 +177,7 @@ static const GLuint ubyte_types_direct[5] = {
0,
BRW_SURFACEFORMAT_R8_UINT,
BRW_SURFACEFORMAT_R8G8_UINT,
-   BRW_SURFACEFORMAT_R8G8B8A8_UINT,
+   BRW_SURFACEFORMAT_R8G8B8_UINT,
BRW_SURFACEFORMAT_R8G8B8A8_UINT
 };
 
@@ -201,7 +201,7 @@ static const GLuint byte_types_direct[5] = {
0,
BRW_SURFACEFORMAT_R8_SINT,
BRW_SURFACEFORMAT_R8G8_SINT,
-   BRW_SURFACEFORMAT_R8G8B8A8_SINT,
+   BRW_SURFACEFORMAT_R8G8B8_SINT,
BRW_SURFACEFORMAT_R8G8B8A8_SINT
 };
 
@@ -248,6 +248,8 @@ brw_get_vertex_surface_type(struct brw_context *brw,
 const struct gl_client_array *glarray)
 {
int size = glarray->Size;
+   const bool is_ivybridge_or_older =
+  brw->gen <= 7 && !brw->is_baytrail && !brw->is_haswell;
 
if (unlikely(INTEL_DEBUG & DEBUG_VERTS))
   fprintf(stderr, "type %s size %d normalized %d\n",
@@ -258,11 +260,27 @@ brw_get_vertex_surface_type(struct brw_context *brw,
   assert(glarray->Format == GL_RGBA); /* sanity check */
   switch (glarray->Type) {
   case GL_INT: return int_types_direct[size];
-  case GL_SHORT: return short_types_direct[size];
-  case GL_BYTE: return byte_types_direct[size];
+  case GL_SHORT:
+ if (is_ivybridge_or_older && size == 3)
+return short_types_direct[4];
+ else
+return short_types_direct[size];
+  case GL_BYTE:
+ if (is_ivybridge_or_older && size == 3)
+return byte_types_direct[4];
+ else
+return byte_types_direct[size];
   case GL_UNSIGNED_INT: return uint_types_direct[size];
-  case GL_UNSIGNED_SHORT: return ushort_types_direct[size];
-  case GL_UNSIGNED_BYTE: return ubyte_types_direct[size];
+  case GL_UNSIGNED_SHORT:
+ if (is_ivybridge_or_older && size == 3)
+return ushort_types_direct[4];
+ else
+return ushort_types_direct[size];
+  case GL_UNSIGNED_BYTE:
+ if (is_ivybridge_or_older && size == 3)
+return ubyte_types_direct[4];
+ else
+return ubyte_types_direct[size];
   default: unreachable("not reached");
   }
} else if (glarray->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) {
@@ -271,7 +289,11 @@ brw_get_vertex_surface_type(struct brw_context *brw,
   switch (glarray->Type) {
   case GL_DOUBLE: return double_types(brw, size, glarray->Doubles);
   case GL_FLOAT: return float_types[size];
-  case GL_HALF_FLOAT: return half_float_types[size];
+  case GL_HALF_FLOAT:
+ if (brw->gen < 6 && size == 3)
+return half_float_types[4];
+ else
+return half_float_types[size];
   case GL_INT: return int_types_norm[size];
   case GL_SHORT: return short_types_norm[size];
   case GL_BYTE: return byte_types_norm[size];
@@ -345,7 +367,11 @@ brw_get_vertex_surface_type(struct brw_context *brw,
   switch (glarray->Type) {
   case GL_DOUBLE: return double_types(brw, size, glarray->Doubles);
   case GL_FLOAT: return float_types[size];
-  case GL_HALF_FLOAT: return half_float_types[size];
+  case GL_HALF_FLOAT:
+ if (brw->gen < 6 && size == 3)
+return half_float_types[4];
+ else
+return half_float_types[size];
   case GL_INT: return int_types_scale[size];
   case GL_SHORT: return short_types_scale[size];
   case GL_BYTE: return byte_types_scale[size];
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

[Mesa-dev] [PATCH v2 13/13] i965: Enable arb_robust_buffer_access_behavior on BYT and HSW+

2016-05-19 Thread Jason Ekstrand
---
 src/mesa/drivers/dri/i965/intel_extensions.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 8b4f685..511ef5c 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -369,6 +369,9 @@ intelInitExtensions(struct gl_context *ctx)
   }
}
 
+   if (brw->gen >= 8 || brw->is_haswell || brw->is_baytrail)
+  ctx->Extensions.ARB_robust_buffer_access_behavior = true;
+
if (brw->intelScreen->has_mi_math_and_lrr) {
   ctx->Extensions.ARB_query_buffer_object = true;
}
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7.8/13] i965/surface_formats: Update the VB column for new formats added on BYT

2016-05-19 Thread Jason Ekstrand
Bay Trail and Haswell added a bunch of new vertex formats.  There was also
the addition of 64-bit passthrough formats for BDW+.
---
 src/mesa/drivers/dri/i965/brw_surface_formats.c | 32 -
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c 
b/src/mesa/drivers/dri/i965/brw_surface_formats.c
index 16667b9..750f37a 100644
--- a/src/mesa/drivers/dri/i965/brw_surface_formats.c
+++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c
@@ -274,26 +274,26 @@ const struct brw_surface_format_info surface_formats[] = {
SF(80, 80,  x,  x,  x,  x,  x,  x,  x,x,   EAC_SIGNED_R11)
SF(80, 80,  x,  x,  x,  x,  x,  x,  x,x,   EAC_SIGNED_RG11)
SF(80, 80,  x,  x,  x,  x,  x,  x,  x,x,   ETC2_SRGB8)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   R16G16B16_UINT)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   R16G16B16_SINT)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   R32_SFIXED)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   R10G10B10A2_SNORM)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   R10G10B10A2_USCALED)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   R10G10B10A2_SSCALED)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   R10G10B10A2_SINT)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   B10G10R10A2_SNORM)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   B10G10R10A2_USCALED)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   B10G10R10A2_SSCALED)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   B10G10R10A2_UINT)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   B10G10R10A2_SINT)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   R64G64B64A64_PASSTHRU)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   R64G64B64_PASSTHRU)
+   SF( x,  x,  x,  x,  x,  x, 75,  x,  x,x,   R16G16B16_UINT)
+   SF( x,  x,  x,  x,  x,  x, 75,  x,  x,x,   R16G16B16_SINT)
+   SF( x,  x,  x,  x,  x,  x, 75,  x,  x,x,   R32_SFIXED)
+   SF( x,  x,  x,  x,  x,  x, 75,  x,  x,x,   R10G10B10A2_SNORM)
+   SF( x,  x,  x,  x,  x,  x, 75,  x,  x,x,   R10G10B10A2_USCALED)
+   SF( x,  x,  x,  x,  x,  x, 75,  x,  x,x,   R10G10B10A2_SSCALED)
+   SF( x,  x,  x,  x,  x,  x, 75,  x,  x,x,   R10G10B10A2_SINT)
+   SF( x,  x,  x,  x,  x,  x, 75,  x,  x,x,   B10G10R10A2_SNORM)
+   SF( x,  x,  x,  x,  x,  x, 75,  x,  x,x,   B10G10R10A2_USCALED)
+   SF( x,  x,  x,  x,  x,  x, 75,  x,  x,x,   B10G10R10A2_SSCALED)
+   SF( x,  x,  x,  x,  x,  x, 75,  x,  x,x,   B10G10R10A2_UINT)
+   SF( x,  x,  x,  x,  x,  x, 75,  x,  x,x,   B10G10R10A2_SINT)
+   SF( x,  x,  x,  x,  x,  x, 80,  x,  x,x,   R64G64B64A64_PASSTHRU)
+   SF( x,  x,  x,  x,  x,  x, 80,  x,  x,x,   R64G64B64_PASSTHRU)
SF(80, 80,  x,  x,  x,  x,  x,  x,  x,x,   ETC2_RGB8_PTA)
SF(80, 80,  x,  x,  x,  x,  x,  x,  x,x,   ETC2_SRGB8_PTA)
SF(80, 80,  x,  x,  x,  x,  x,  x,  x,x,   ETC2_EAC_RGBA8)
SF(80, 80,  x,  x,  x,  x,  x,  x,  x,x,   ETC2_EAC_SRGB8_A8)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   R8G8B8_UINT)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   R8G8B8_SINT)
+   SF( x,  x,  x,  x,  x,  x, 75,  x,  x,x,   R8G8B8_UINT)
+   SF( x,  x,  x,  x,  x,  x, 75,  x,  x,x,   R8G8B8_SINT)
SF(80, 80,  x,  x,  x,  x,  x,  x,  x,x,   ASTC_LDR_2D_4x4_FLT16)
SF(80, 80,  x,  x,  x,  x,  x,  x,  x,x,   ASTC_LDR_2D_5x4_FLT16)
SF(80, 80,  x,  x,  x,  x,  x,  x,  x,x,   ASTC_LDR_2D_5x5_FLT16)
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] tilers and out-of-order rendering..

2016-05-19 Thread Rob Clark
On Thu, May 19, 2016 at 6:21 PM, Eric Anholt  wrote:
> Rob Clark  writes:
>
>> So some rendering patterns that I've seen in apps turn out to be
>> somewhat evil for tiling gpu's.. couple cases I've seen:
>>
>> 1) stk has some silliness where it binds an fbo, clears, binds other
>> fbo clears, binds previous fbo and draws, and so on.  This one is
>> probably not too hard to just fix in stk.
>>
>> 2) I've seen a render pattern in manhattan where app does a bunch of
>> texture uploads mid-frame via a pbo (and then generates mipmap levels
>> for the updated texture, which hits the blit path which changes fb
>> state and forces a flush).  This one probably not something that can
>> be fixed in the app ;-)
>>
>> There are probably other cases where this comes up which I haven't
>> noticed yet.  I'm not entirely sure how common the pattern that I see
>> in manhattan is.
>>
>> At one point, Eric Anholt mentioned the idea of tracking rendering
>> cmdstream per render-target, as well as dependency information between
>> these different sets of cmdstream (if you render to one fbo, then turn
>> around and sample from it, the rendering needs to happen before the
>> sampling).  I've been thinking a bit about how this would actually
>> work, and trying to do some experiments to get an idea about how
>> useful this would be.
>
> My plan was pretty much what you laid out here, except I was going to
> just map to my CL struct with a little hash table from the FB state
> members since FB state isn't a CSO.

ok, yeah, I guess that solves the naming conflict (fd_batch(_state)
sounds nicer for what it's purpose really is than
fd_framebuffer_state)

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50/ir: fix SUSTx constraints on Kepler

2016-05-19 Thread Ilia Mirkin
Reviewed-by: Ilia Mirkin 

I actually think this is wrong for SUSTB, but ... meh.

Also, since we know the format, we could avoid computing the colors
that aren't present in the image, but again ... meh.

On Thu, May 19, 2016 at 6:52 PM, Samuel Pitoiset
 wrote:
> To prevent out-of-bounds access and format mismatch we add a predicate
> on sustp, but we have to account for it when the sources are condensed
> because a predicate is a source. Using the range 3:6 will only condense
> the input data and it's always the case. This also fixes constraints
> when an indirect access is used.
>
> This fixes a rendering issue in the realistic rendering demo from UE4.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> index b893996..f6277d8 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> @@ -2126,9 +2126,7 @@ 
> RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex)
> condenseDefs(tex);
>
> if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) {
> -  int n = tex->srcCount(0xff);
> -  if (n > 4)
> - condenseSrcs(tex, 3, n - 1);
> +  condenseSrcs(tex, 3, 6);
> } else
> if (isTextureOp(tex->op)) {
>int n = tex->srcCount(0xff, true);
> --
> 2.8.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50/ir: fix SUSTx constraints on Kepler

2016-05-19 Thread Samuel Pitoiset
To prevent out-of-bounds access and format mismatch we add a predicate
on sustp, but we have to account for it when the sources are condensed
because a predicate is a source. Using the range 3:6 will only condense
the input data and it's always the case. This also fixes constraints
when an indirect access is used.

This fixes a rendering issue in the realistic rendering demo from UE4.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index b893996..f6277d8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -2126,9 +2126,7 @@ 
RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex)
condenseDefs(tex);
 
if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) {
-  int n = tex->srcCount(0xff);
-  if (n > 4)
- condenseSrcs(tex, 3, n - 1);
+  condenseSrcs(tex, 3, 6);
} else
if (isTextureOp(tex->op)) {
   int n = tex->srcCount(0xff, true);
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] tilers and out-of-order rendering..

2016-05-19 Thread Eric Anholt
Rob Clark  writes:

> So some rendering patterns that I've seen in apps turn out to be
> somewhat evil for tiling gpu's.. couple cases I've seen:
>
> 1) stk has some silliness where it binds an fbo, clears, binds other
> fbo clears, binds previous fbo and draws, and so on.  This one is
> probably not too hard to just fix in stk.
>
> 2) I've seen a render pattern in manhattan where app does a bunch of
> texture uploads mid-frame via a pbo (and then generates mipmap levels
> for the updated texture, which hits the blit path which changes fb
> state and forces a flush).  This one probably not something that can
> be fixed in the app ;-)
>
> There are probably other cases where this comes up which I haven't
> noticed yet.  I'm not entirely sure how common the pattern that I see
> in manhattan is.
>
> At one point, Eric Anholt mentioned the idea of tracking rendering
> cmdstream per render-target, as well as dependency information between
> these different sets of cmdstream (if you render to one fbo, then turn
> around and sample from it, the rendering needs to happen before the
> sampling).  I've been thinking a bit about how this would actually
> work, and trying to do some experiments to get an idea about how
> useful this would be.

My plan was pretty much what you laid out here, except I was going to
just map to my CL struct with a little hash table from the FB state
members since FB state isn't a CSO.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: handle same struct redeclaration (v2)

2016-05-19 Thread Dave Airlie
From: Dave Airlie 

This works around a bug in older version of UE4, where a shader
defines the same structure twice. Although we aren't sure this is correct
GLSL (it most likely isn't) there are enough UE4 based things out there
we should deal with this.

This drops the error to a warning if the struct names and contents match.

v1.1: do better C++ on record_compare declaration (Rob)
v2: restrict this to desktop GL only (Ian)

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95005
Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/ast_to_hir.cpp | 7 ++-
 src/compiler/glsl_types.cpp  | 4 ++--
 src/compiler/glsl_types.h| 4 +++-
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index b4c6de2..ecfe684 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -6918,7 +6918,12 @@ ast_struct_specifier::hir(exec_list *instructions,
   glsl_type::get_record_instance(fields, decl_count, this->name);
 
if (!state->symbols->add_type(name, t)) {
-  _mesa_glsl_error(& loc, state, "struct `%s' previously defined", name);
+  const glsl_type *match = state->symbols->get_type(name);
+  /* allow struct matching for desktop GL - older UE4 does this */
+  if (state->is_version(130, 0) && match->record_compare(t, false))
+ _mesa_glsl_warning(& loc, state, "struct `%s' previously defined", 
name);
+  else
+ _mesa_glsl_error(& loc, state, "struct `%s' previously defined", 
name);
} else {
   const glsl_type **s = reralloc(state, state->user_structures,
  const glsl_type *,
diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp
index c058283..11f1e85 100644
--- a/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@ -856,7 +856,7 @@ glsl_type::get_array_instance(const glsl_type *base, 
unsigned array_size)
 
 
 bool
-glsl_type::record_compare(const glsl_type *b) const
+glsl_type::record_compare(const glsl_type *b, bool match_locations) const
 {
if (this->length != b->length)
   return false;
@@ -887,7 +887,7 @@ glsl_type::record_compare(const glsl_type *b) const
   if (this->fields.structure[i].matrix_layout
  != b->fields.structure[i].matrix_layout)
 return false;
-  if (this->fields.structure[i].location
+  if (match_locations && this->fields.structure[i].location
   != b->fields.structure[i].location)
  return false;
   if (this->fields.structure[i].offset
diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h
index a47b0ff..7f9e318 100644
--- a/src/compiler/glsl_types.h
+++ b/src/compiler/glsl_types.h
@@ -740,8 +740,10 @@ struct glsl_type {
 * Compare a record type against another record type.
 *
 * This is useful for matching record types declared across shader stages.
+* The option to not match locations is to deal with places where the
+* same struct is defined in a block which has a location set on it.
 */
-   bool record_compare(const glsl_type *b) const;
+   bool record_compare(const glsl_type *b, bool match_locations = true) const;
 
 private:
 
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/tgsi: use _mesa_roundevenf in micro_rnd

2016-05-19 Thread Matt Turner
On Thu, May 19, 2016 at 2:34 PM, Lars Hamre  wrote:
> Fixes the following piglit tests (for softpipe):
>
> /spec/glsl-1.30/execution/built-in-functions/...
> fs-roundeven-float
> fs-roundeven-vec2
> fs-roundeven-vec3
> fs-roundeven-vec4
> vs-roundeven-float
> vs-roundeven-vec2
> vs-roundeven-vec3
> vs-roundeven-vec4
>
> /spec/glsl-1.50/execution/built-in-functions/...
> gs-roundeven-float
> gs-roundeven-vec2
> gs-roundeven-vec3
> gs-roundeven-vec4
>
> Signed-off-by: Lars Hamre 
>
> ---
>
> Note: someone with access will need to commit this
>   after the review process.

I'm not going to commit it myself because I don't work on a Gallium
driver, but I'm very glad to see the patch.

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/13] glsl: Add an option to clamp block indices when lowering UBO/SSBOs

2016-05-19 Thread Jason Ekstrand
On Thu, May 19, 2016 at 10:07 AM, Ian Romanick  wrote:

> So... what did we decide for arrays of atomic counters?  Do we need an
> extra pass for that or ... ?
>
> Also... how does this handle the possibly unsized (actually
> draw-time-sized) array at the end of an SSBO?
>

This patch only matters for the block index not the offset into the
buffer.  For atomics, the "block index" isn't allowed to be indirected
(Maybe they don't even have one?).  For UBOs and SSBOs, we set the correct
buffer size in the surface state so the hardware won't let it go outside.
This should also work for SSBO unsized arrays.


> For UBOs, I think this patch is definitely sufficient, and I think it
> improves things quite a lot for SSBOs.  We may need some more, but this
> patch is
>
> Reviewed-by: Ian Romanick 
>
> On 05/19/2016 12:21 AM, Jason Ekstrand wrote:
> > This prevents array overflow when the block is actually an array of UBOs
> or
> > SSBOs.  On some hardware such as i965, such overflows can cause GPU
> hangs.
> > ---
> >  src/compiler/glsl/ir_optimization.h   |  2 +-
> >  src/compiler/glsl/linker.cpp  |  3 ++-
> >  src/compiler/glsl/lower_ubo_reference.cpp | 36
> +++
> >  src/mesa/drivers/dri/i965/brw_compiler.c  |  1 +
> >  src/mesa/main/mtypes.h|  3 +++
> >  5 files changed, 39 insertions(+), 6 deletions(-)
> >
> > diff --git a/src/compiler/glsl/ir_optimization.h
> b/src/compiler/glsl/ir_optimization.h
> > index 5fc2740..4afa37e 100644
> > --- a/src/compiler/glsl/ir_optimization.h
> > +++ b/src/compiler/glsl/ir_optimization.h
> > @@ -123,7 +123,7 @@ bool lower_clip_distance(gl_shader *shader);
> >  void lower_output_reads(unsigned stage, exec_list *instructions);
> >  bool lower_packing_builtins(exec_list *instructions, int op_mask);
> >  void lower_shared_reference(struct gl_shader *shader, unsigned
> *shared_size);
> > -void lower_ubo_reference(struct gl_shader *shader);
> > +void lower_ubo_reference(struct gl_shader *shader, bool
> clamp_block_indices);
> >  void lower_packed_varyings(void *mem_ctx,
> > unsigned locations_used, ir_variable_mode
> mode,
> > unsigned gs_input_vertices, gl_shader
> *shader,
> > diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
> > index 71a71df..07c8263 100644
> > --- a/src/compiler/glsl/linker.cpp
> > +++ b/src/compiler/glsl/linker.cpp
> > @@ -4879,7 +4879,8 @@ link_shaders(struct gl_context *ctx, struct
> gl_shader_program *prog)
> >   >Const.ShaderCompilerOptions[i];
> >
> >if (options->LowerBufferInterfaceBlocks)
> > - lower_ubo_reference(prog->_LinkedShaders[i]);
> > + lower_ubo_reference(prog->_LinkedShaders[i],
> > + options->ClampBlockIndicesToArrayBounds);
> >
> >if (options->LowerShaderSharedVariables)
> >   lower_shared_reference(prog->_LinkedShaders[i],
> > diff --git a/src/compiler/glsl/lower_ubo_reference.cpp
> b/src/compiler/glsl/lower_ubo_reference.cpp
> > index 1a0140f..749deed 100644
> > --- a/src/compiler/glsl/lower_ubo_reference.cpp
> > +++ b/src/compiler/glsl/lower_ubo_reference.cpp
> > @@ -44,8 +44,10 @@ namespace {
> >  class lower_ubo_reference_visitor :
> >public lower_buffer_access::lower_buffer_access {
> >  public:
> > -   lower_ubo_reference_visitor(struct gl_shader *shader)
> > -   : shader(shader), struct_field(NULL), variable(NULL)
> > +   lower_ubo_reference_visitor(struct gl_shader *shader,
> > +   bool clamp_block_indices)
> > +   : shader(shader), clamp_block_indices(clamp_block_indices),
> > + struct_field(NULL), variable(NULL)
> > {
> > }
> >
> > @@ -104,6 +106,7 @@ public:
> > ir_visitor_status visit_enter(ir_call *ir);
> >
> > struct gl_shader *shader;
> > +   bool clamp_block_indices;
> > struct gl_uniform_buffer_variable *ubo_var;
> > const struct glsl_struct_field *struct_field;
> > ir_variable *variable;
> > @@ -242,6 +245,26 @@ interface_field_name(void *mem_ctx, char
> *base_name, ir_rvalue *d,
> > return NULL;
> >  }
> >
> > +static ir_rvalue *
> > +clamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type
> *type)
> > +{
> > +   assert(type->is_array());
> > +
> > +   const unsigned array_size = type->arrays_of_arrays_size();
> > +
> > +   ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1);
> > +   max_index->type = index->type;
> > +
> > +   ir_constant *zero = new(mem_ctx) ir_constant(0);
> > +   zero->type = index->type;
> > +
> > +   if (index->type->base_type == GLSL_TYPE_INT)
> > +  index = max2(index, zero);
> > +   index = min2(index, max_index);
> > +
> > +   return index;
> > +}
> > +
> >  void
> >  lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
> >   ir_variable *var,
> > @@ -258,6 

[Mesa-dev] [PATCH] i965: Pass nir_src/nir_dest by reference.

2016-05-19 Thread Matt Turner
Cuts 6K of .text.

   textdata bss dec hex filename
5772372  264648   29320 6066340  5c90a4 lib/i965_dri.so before
5766074  264648   29320 6060042  5c780a lib/i965_dri.so after
---
 src/mesa/drivers/dri/i965/brw_fs.h |  4 ++--
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp   |  4 ++--
 src/mesa/drivers/dri/i965/brw_vec4.h   | 12 ++--
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 12 ++--
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index ac270cd..ab79cc8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -256,8 +256,8 @@ public:
  nir_tex_instr *instr);
void nir_emit_jump(const brw::fs_builder ,
   nir_jump_instr *instr);
-   fs_reg get_nir_src(nir_src src);
-   fs_reg get_nir_dest(nir_dest dest);
+   fs_reg get_nir_src(const nir_src );
+   fs_reg get_nir_dest(const nir_dest );
fs_reg get_nir_image_deref(const nir_deref_var *deref);
fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
void emit_percomp(const brw::fs_builder , const fs_inst ,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index ebcc92a..c12e8ee 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1451,7 +1451,7 @@ fs_visitor::nir_emit_undef(const fs_builder , 
nir_ssa_undef_instr *instr)
 }
 
 fs_reg
-fs_visitor::get_nir_src(nir_src src)
+fs_visitor::get_nir_src(const nir_src )
 {
fs_reg reg;
if (src.is_ssa) {
@@ -1471,7 +1471,7 @@ fs_visitor::get_nir_src(nir_src src)
 }
 
 fs_reg
-fs_visitor::get_nir_dest(nir_dest dest)
+fs_visitor::get_nir_dest(const nir_dest )
 {
if (dest.is_ssa) {
   const brw_reg_type reg_type =
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index bc54eaf..76dea04 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -326,14 +326,14 @@ public:
virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
 
-   dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
-   dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
-   dst_reg get_nir_dest(nir_dest dest);
-   src_reg get_nir_src(nir_src src, enum brw_reg_type type,
+   dst_reg get_nir_dest(const nir_dest , enum brw_reg_type type);
+   dst_reg get_nir_dest(const nir_dest , nir_alu_type type);
+   dst_reg get_nir_dest(const nir_dest );
+   src_reg get_nir_src(const nir_src , enum brw_reg_type type,
unsigned num_components = 4);
-   src_reg get_nir_src(nir_src src, nir_alu_type type,
+   src_reg get_nir_src(const nir_src , nir_alu_type type,
unsigned num_components = 4);
-   src_reg get_nir_src(nir_src src,
+   src_reg get_nir_src(const nir_src ,
unsigned num_components = 4);
src_reg get_indirect_offset(nir_intrinsic_instr *instr);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 29f52fa..f3b4528 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -267,7 +267,7 @@ dst_reg_for_nir_reg(vec4_visitor *v, nir_register *nir_reg,
 }
 
 dst_reg
-vec4_visitor::get_nir_dest(nir_dest dest)
+vec4_visitor::get_nir_dest(const nir_dest )
 {
if (dest.is_ssa) {
   dst_reg dst = dst_reg(VGRF, alloc.allocate(1));
@@ -280,19 +280,19 @@ vec4_visitor::get_nir_dest(nir_dest dest)
 }
 
 dst_reg
-vec4_visitor::get_nir_dest(nir_dest dest, enum brw_reg_type type)
+vec4_visitor::get_nir_dest(const nir_dest , enum brw_reg_type type)
 {
return retype(get_nir_dest(dest), type);
 }
 
 dst_reg
-vec4_visitor::get_nir_dest(nir_dest dest, nir_alu_type type)
+vec4_visitor::get_nir_dest(const nir_dest , nir_alu_type type)
 {
return get_nir_dest(dest, brw_type_for_nir_type(type));
 }
 
 src_reg
-vec4_visitor::get_nir_src(nir_src src, enum brw_reg_type type,
+vec4_visitor::get_nir_src(const nir_src , enum brw_reg_type type,
   unsigned num_components)
 {
dst_reg reg;
@@ -314,14 +314,14 @@ vec4_visitor::get_nir_src(nir_src src, enum brw_reg_type 
type,
 }
 
 src_reg
-vec4_visitor::get_nir_src(nir_src src, nir_alu_type type,
+vec4_visitor::get_nir_src(const nir_src , nir_alu_type type,
   unsigned num_components)
 {
return get_nir_src(src, brw_type_for_nir_type(type), num_components);
 }
 
 src_reg
-vec4_visitor::get_nir_src(nir_src src, unsigned num_components)
+vec4_visitor::get_nir_src(const nir_src , unsigned num_components)
 {
/* if type is not specified, default to signed int */
return get_nir_src(src, nir_type_int, num_components);
-- 
2.7.3

___
mesa-dev mailing list

[Mesa-dev] [PATCH] gallium/tgsi: use _mesa_roundevenf in micro_rnd

2016-05-19 Thread Lars Hamre
Fixes the following piglit tests (for softpipe):

/spec/glsl-1.30/execution/built-in-functions/...
fs-roundeven-float
fs-roundeven-vec2
fs-roundeven-vec3
fs-roundeven-vec4
vs-roundeven-float
vs-roundeven-vec2
vs-roundeven-vec3
vs-roundeven-vec4

/spec/glsl-1.50/execution/built-in-functions/...
gs-roundeven-float
gs-roundeven-vec2
gs-roundeven-vec3
gs-roundeven-vec4

Signed-off-by: Lars Hamre 

---

Note: someone with access will need to commit this
  after the review process.

 src/gallium/auxiliary/tgsi/tgsi_exec.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index d483429..baf4a89 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -61,6 +61,7 @@
 #include "util/u_half.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
+#include "util/rounding.h"


 #define DEBUG_EXECUTION 0
@@ -543,10 +544,10 @@ static void
 micro_rnd(union tgsi_exec_channel *dst,
   const union tgsi_exec_channel *src)
 {
-   dst->f[0] = floorf(src->f[0] + 0.5f);
-   dst->f[1] = floorf(src->f[1] + 0.5f);
-   dst->f[2] = floorf(src->f[2] + 0.5f);
-   dst->f[3] = floorf(src->f[3] + 0.5f);
+   dst->f[0] = _mesa_roundevenf(src->f[0]);
+   dst->f[1] = _mesa_roundevenf(src->f[1]);
+   dst->f[2] = _mesa_roundevenf(src->f[2]);
+   dst->f[3] = _mesa_roundevenf(src->f[3]);
 }

 static void
--
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/13] i965/draw: Delay when we get the bo for vertex buffers

2016-05-19 Thread Jason Ekstrand
On Thu, May 19, 2016 at 7:48 AM, Iago Toral  wrote:

> On Thu, 2016-05-19 at 00:21 -0700, Jason Ekstrand wrote:
> > The previous code got the BO the first time we encountered it.  However,
> > this can potentially lead to problems if the BO is used for multiple
> arrays
> > with the same buffer object because the range we declare as busy may not
> be
> > quite right.  By delaying the call to intel_bufferobj_buffer, we can
> ensure
> > that we have the full range for the given buffer.
> >
> > Cc: "10.2" 
> > ---
> >  src/mesa/drivers/dri/i965/brw_draw_upload.c | 71
> -
> >  1 file changed, 49 insertions(+), 22 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c
> b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> > index 3ec37f8..0a7725d 100644
> > --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
> > +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> > @@ -453,6 +453,11 @@ brw_prepare_vertices(struct brw_context *brw)
> > if (brw->vb.nr_buffers)
> >return;
> >
> > +   /* The range of data in a given buffer represented as [min, max) */
> > +   struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX];
> > +   uint32_t buffer_range_start[VERT_ATTRIB_MAX];
> > +   uint32_t buffer_range_end[VERT_ATTRIB_MAX];
> > +
> > for (i = j = 0; i < brw->vb.nr_enabled; i++) {
> >struct brw_vertex_element *input = brw->vb.enabled[i];
> >const struct gl_client_array *glarray = input->glarray;
> > @@ -460,12 +465,31 @@ brw_prepare_vertices(struct brw_context *brw)
> >if (_mesa_is_bufferobj(glarray->BufferObj)) {
> >struct intel_buffer_object *intel_buffer =
> >   intel_buffer_object(glarray->BufferObj);
> > -  unsigned k;
> > +
> > + const uint32_t offset = (uintptr_t)glarray->Ptr;
>
> Should we use uint64_t instead or do we know that these offsets need to
> be within a 32-bit address?
>

I think they do need to be within 32 bits at the moment because we use 32
bits everywhere.  Maybe on BDW+ we should do 64 bits but I think that's a
separate patch.


>
> > + uint32_t start, range;
> > + if (glarray->InstanceDivisor) {
> > +start = offset;
> > +range = (glarray->StrideB * ((brw->num_instances /
> > + glarray->InstanceDivisor) - 1)
> +
> > + glarray->_ElementSize);
> > + } else {
> > +if (!brw->vb.index_bounds_valid) {
> > +   start = 0;
> > +   range = intel_buffer->Base.Size;
> > +} else {
> > +   start = offset + min_index * glarray->StrideB;
> > +   range = (glarray->StrideB * (max_index - min_index) +
> > +glarray->_ElementSize);
> > +}
> > + }
> >
> >/* If we have a VB set to be uploaded for this buffer object
> > * already, reuse that VB state so that we emit fewer
> > * relocations.
> > */
> > +  unsigned k;
> >for (k = 0; k < i; k++) {
> >   const struct gl_client_array *other =
> brw->vb.enabled[k]->glarray;
> >   if (glarray->BufferObj == other->BufferObj &&
> > @@ -475,6 +499,9 @@ brw_prepare_vertices(struct brw_context *brw)
> >   {
> >  input->buffer = brw->vb.enabled[k]->buffer;
> >  input->offset = glarray->Ptr - other->Ptr;
> > +
> > +   buffer_range_start[k] = MIN2(buffer_range_start[k],
> start);
> > +   buffer_range_end[k] = MAX2(buffer_range_end[k], start +
> range);
> >  break;
> >   }
> >}
> > @@ -482,29 +509,13 @@ brw_prepare_vertices(struct brw_context *brw)
> >   struct brw_vertex_buffer *buffer = >vb.buffers[j];
> >
> >   /* Named buffer object: Just reference its contents directly.
> */
> > - buffer->offset = (uintptr_t)glarray->Ptr;
> > + buffer->offset = offset;
> >   buffer->stride = glarray->StrideB;
> >   buffer->step_rate = glarray->InstanceDivisor;
> >
> > -uint32_t offset, size;
> > -if (glarray->InstanceDivisor) {
> > -   offset = buffer->offset;
> > -   size = (buffer->stride * ((brw->num_instances /
> > -  glarray->InstanceDivisor) -
> 1) +
> > -   glarray->_ElementSize);
> > -} else {
> > -   if (!brw->vb.index_bounds_valid) {
> > -  offset = 0;
> > -  size = intel_buffer->Base.Size;
> > -   } else {
> > -  offset = buffer->offset + min_index * buffer->stride;
> > -  size = (buffer->stride * (max_index - min_index) +
> > -  glarray->_ElementSize);
> > -   }
> > -}
> > -buffer->bo = intel_bufferobj_buffer(brw, intel_buffer,
> > -   

Re: [Mesa-dev] [RFC 3/7] nir: coverity unitialized pointer read

2016-05-19 Thread Rob Clark
On Thu, May 19, 2016 at 4:42 PM, Matt Turner  wrote:
> On Wed, May 18, 2016 at 8:54 AM, Rob Clark  wrote:
>> From: Rob Clark 
>>
>> Not sure how coverity arrives at the conclusion that we can read comp[j]
>> unitialized (around line 204), other than not being aware that ncomp is
>> greater than 1 so it won't underflow in the 'if (tex->is_array)' case.
>> ---
>>  src/compiler/nir/nir_lower_tex.c | 6 ++
>>  1 file changed, 6 insertions(+)
>>
>> diff --git a/src/compiler/nir/nir_lower_tex.c 
>> b/src/compiler/nir/nir_lower_tex.c
>> index a080475..c05d48b 100644
>> --- a/src/compiler/nir/nir_lower_tex.c
>> +++ b/src/compiler/nir/nir_lower_tex.c
>> @@ -177,6 +177,12 @@ saturate_src(nir_builder *b, nir_tex_instr *tex, 
>> unsigned sat_mask)
>>/* split src into components: */
>>nir_ssa_def *comp[4];
>>
>> +  /* NOTE: coord_components won't be >4 or <1 but coverity doesn't
>> +   * know this:
>> +   */
>
> I'd drop the comment. git blame will allow us to figure out why the
> assume() is there if needed.
>
>> +  assume(tex->coord_components < ARRAY_SIZE(comp));
>> +  assume(tex->coord_components >= 1);
>
> I think the second one is sufficient, since part of the path involves
> ncomp-- I think that it believes coord_components can be zero so
> subtracting 1 will produce UINT_MAX.
>
> With the comment and the first assume() dropped,

fwiw, first assume() was mostly because I was surprised that coverity
wasn't also upset about coord_components>=4 case.  (Not sure if it
just doesn't bother warning about that sort of thing?  Since I'm not
entirely sure how it could figure that out otherwise.. maybe it really
is that clever..)

BR,
-R

> Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] tilers and out-of-order rendering..

2016-05-19 Thread Rob Clark
So some rendering patterns that I've seen in apps turn out to be
somewhat evil for tiling gpu's.. couple cases I've seen:

1) stk has some silliness where it binds an fbo, clears, binds other
fbo clears, binds previous fbo and draws, and so on.  This one is
probably not too hard to just fix in stk.

2) I've seen a render pattern in manhattan where app does a bunch of
texture uploads mid-frame via a pbo (and then generates mipmap levels
for the updated texture, which hits the blit path which changes fb
state and forces a flush).  This one probably not something that can
be fixed in the app ;-)

There are probably other cases where this comes up which I haven't
noticed yet.  I'm not entirely sure how common the pattern that I see
in manhattan is.

At one point, Eric Anholt mentioned the idea of tracking rendering
cmdstream per render-target, as well as dependency information between
these different sets of cmdstream (if you render to one fbo, then turn
around and sample from it, the rendering needs to happen before the
sampling).  I've been thinking a bit about how this would actually
work, and trying to do some experiments to get an idea about how
useful this would be.

In the manhattan case, via a bit of a hack (to basically no-op the
pipe->blit() to avoid interrupting the tiling pass), I guestimate that
if we were able to re-order the rendering it would gain us something
around 15%.  (This is on ifc6540.. the win might be bigger on
something more memory bandwidth constrained.)

To realize the benefit we would require a bit more cleverness in
pipe->transfer_map to realize that the whole texture contents is being
updated and turn the DISCARD_RANGE into DISCARD_WHOLE_RESOURCE.  The
problem being, I think, that it is only discarding the first mipmap
level so we'd need realize that in the new buffer the additional
mipmap levels aren't valid.. no idea how that would work.. but in this
case it seems like mostly a smallish (128x128) texture so maybe it is
a win to just memcpy the rest of the old texture data over to the new
texture bo to avoid the stall/flush.

Anyways, the basic idea involves turning pipe_framebuffer_state into a
refcnt'd CSO inside the driver, and use that as the point to track
rendering cmds and dependency info.  (It would be kinda nice if fb
state was already a CSO.. but I guess we can work around that in the
driver using the pipe_framebuffer_state as the hashtable key..
hopefully we can rely on not having garbage data in unused cbuf slots?
 Otherwise we might need a custom hash/equals fxn.)  So something
like:

   /* framebuffer CSO: */
   /* TODO maybe it is more clear to call it fd_batch? */
   struct fd_framebuffer_state {
  struct pipe_reference refcnt;
  struct pipe_framebuffer_state base;
  struct fd_context *ctx;
  struct fd_ringbuffer *ring;
  struct set *dependencies;   /* hashset of dependent
fd_framebuffer_state(s) */
  bool dirty;
   }

When new fb state is set, hashtable lookup and increment the refcnt of
existing CSO if it exists, else create new state object.  And unref
the outgoing CSO.  Whenever there is unflushed rendering to a prsc
(pipe_resource), the prsc would need to also hold a refcnt to the most
recent fb CSO which renders to the prsc to keep the fb CSO live as
long as something depends on it.  Also we need to hold ref's to all
the entries in the dependencies table.

Whenever we emit a reference to another prsc (texture, vbo, index
buffer, etc), we'd have to check if it has pending rendering in a
different fb CSO.  I think for the most part we could replace
OUT_RELOC(fd_bo *) helper with OUT_PRSC(pipe_resource *).. so
something roughly like:

   struct fd_resource {
  struct u_resource base;
  ...
- struct fd_context *pending_ctx;
+ /* hold ref to most recent fb CSO that rendered to us: */
+ struct fd_framebuffer_state *pending_fb;
   }

   static inline void
   OUT_RSC(struct fb_ringbuffer *ring, struct fd_resource *rsc)
   {
   if (rsc->pending_fb && rsc->pending_fb->dirty) {
  /* a bit ugly to chase the current ctx ptr this way, but
   * OUT_RING() is already used in a lot of places that
   * don't have ctx ptr handy..
   */
  struct fd_context *ctx = rsc->pending_fb->ctx;

  /* check for reverse dependency.. if other fb CSO already
   * depends on current fb then we cannot create a loop:
   */
  if (depends_on(rsc->pending_fb, ctx->fb)) {
 fd_context_render(ctx, ctx->fb);
  } else {
 .. add rsc->pending_fb to ctx->fb->dependencies ..
  }
   }
   OUT_RING(ring, rsc->bo);
   }

   static inline void
   OUT_PRSC(struct fd_ringbuffer *ring, struct pipe_resource *prsc)
   {
   OUT_RSC(ring, fd_resource(prsc));
   }



TODO:
1) how would queries work when we start re-ordering rendering?
   I guess we need a query results bo per fb CSO and the query
   needs to hold ref's to all the fb CSO's that 

Re: [Mesa-dev] [PATCH] glxcmds: glXGetFBConfigs, fix screen bounds

2016-05-19 Thread Matt Turner
I suspect you'll want to go ahead and commit that for him.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Fix strerror error code sign

2016-05-19 Thread Ben Widawsky
On Thu, May 19, 2016 at 01:51:08PM -0700, Mark Janes wrote:
> This trivial fix to error-handling corrects the sign of drm error
> codes before passing them to strerror.
> 
> Identified by Coverity: CID1358581

Reviewed-by: Ben Widawsky 

[snip]
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Fix strerror error code sign

2016-05-19 Thread Mark Janes
This trivial fix to error-handling corrects the sign of drm error
codes before passing them to strerror.

Identified by Coverity: CID1358581
---
 src/mesa/drivers/dri/i965/intel_screen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 1a0541a..076fa24 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -,7 +,7 @@ intel_detect_sseu(struct intel_screen *intelScreen)
 err_out:
intelScreen->subslice_total = -1;
intelScreen->eu_total = -1;
-   _mesa_warning(NULL, "Failed to query GPU properties (%s).\n", 
strerror(ret));
+   _mesa_warning(NULL, "Failed to query GPU properties (%s).\n", 
strerror(-ret));
 }
 
 static bool
-- 
2.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 7/7] nir/validate: assume() that hashtable entry exists

2016-05-19 Thread Matt Turner
Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 3/7] nir: coverity unitialized pointer read

2016-05-19 Thread Matt Turner
On Wed, May 18, 2016 at 8:54 AM, Rob Clark  wrote:
> From: Rob Clark 
>
> Not sure how coverity arrives at the conclusion that we can read comp[j]
> unitialized (around line 204), other than not being aware that ncomp is
> greater than 1 so it won't underflow in the 'if (tex->is_array)' case.
> ---
>  src/compiler/nir/nir_lower_tex.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/src/compiler/nir/nir_lower_tex.c 
> b/src/compiler/nir/nir_lower_tex.c
> index a080475..c05d48b 100644
> --- a/src/compiler/nir/nir_lower_tex.c
> +++ b/src/compiler/nir/nir_lower_tex.c
> @@ -177,6 +177,12 @@ saturate_src(nir_builder *b, nir_tex_instr *tex, 
> unsigned sat_mask)
>/* split src into components: */
>nir_ssa_def *comp[4];
>
> +  /* NOTE: coord_components won't be >4 or <1 but coverity doesn't
> +   * know this:
> +   */

I'd drop the comment. git blame will allow us to figure out why the
assume() is there if needed.

> +  assume(tex->coord_components < ARRAY_SIZE(comp));
> +  assume(tex->coord_components >= 1);

I think the second one is sufficient, since part of the path involves
ncomp-- I think that it believes coord_components can be zero so
subtracting 1 will produce UINT_MAX.

With the comment and the first assume() dropped,

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 1/7] nir/glsl_to_nir: quell some uninit_member coverity errors

2016-05-19 Thread Matt Turner
On Wed, May 18, 2016 at 8:54 AM, Rob Clark  wrote:
> From: Rob Clark 
>
> ---
>  src/compiler/nir/glsl_to_nir.cpp | 7 +++
>  1 file changed, 7 insertions(+)
>
> diff --git a/src/compiler/nir/glsl_to_nir.cpp 
> b/src/compiler/nir/glsl_to_nir.cpp
> index b25f065..3b487ff 100644
> --- a/src/compiler/nir/glsl_to_nir.cpp
> +++ b/src/compiler/nir/glsl_to_nir.cpp
> @@ -213,6 +213,13 @@ nir_visitor::nir_visitor(nir_shader *shader)
>   _mesa_key_pointer_equal);
> this->overload_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
>_mesa_key_pointer_equal);
> +   /* initialize these to quell warnings: */

It's good practice for the constructor to initialize the whole object,
so I don't think the comment adds anything.

Acked-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 2/7] nir: coverity sign-extension fix

2016-05-19 Thread Matt Turner
Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/12] i965, anv: Use NIR FragCoord re-center and y-transform passes.

2016-05-19 Thread Matt Turner
On Wed, May 18, 2016 at 3:00 PM, Kenneth Graunke  wrote:
> This handles gl_FragCoord transformations and other window system vs.
> user FBO coordinate system flipping by multiplying/adding uniform
> values, rather than recompiles.
>
> This is much better because we have no decent way to guess whether
> the application is going to use a shader with the window system FBO
> or a user FBO, much less the drawable height.  This led to a lot of
> recompiles in many applications.

Patches 9-11 are

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/12] i965: Delete dead dFdy flipping code.

2016-05-19 Thread Matt Turner
On Wed, May 18, 2016 at 3:00 PM, Kenneth Graunke  wrote:
> Rob's pass flips dFdy in the opposite case of what I expected,

"Rob's pass" might not mean anything to a future reader. I'd call it
out by name.

> so we always take the negate_value case.  It doesn't really matter.
>
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 24 +---
>  1 file changed, 5 insertions(+), 19 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index ed790b5..53131c4 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -1101,8 +1101,6 @@ void
>  fs_generator::generate_ddy(enum opcode opcode,
> struct brw_reg dst, struct brw_reg src)
>  {
> -   bool negate_value = true;
> -
> if (opcode == FS_OPCODE_DDY_FINE) {
>/* From the Ivy Bridge PRM, volume 4 part 3, section 3.3.9 (Register
> * Region Restrictions):
> @@ -1152,20 +1150,11 @@ fs_generator::generate_ddy(enum opcode opcode,
>if (unroll_to_simd8) {
>   brw_set_default_exec_size(p, BRW_EXECUTE_8);
>   brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
> - if (negate_value) {
> -brw_ADD(p, firsthalf(dst), firsthalf(src1), 
> negate(firsthalf(src0)));
> -brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
> -brw_ADD(p, sechalf(dst), sechalf(src1), negate(sechalf(src0)));
> - } else {
> -brw_ADD(p, firsthalf(dst), firsthalf(src0), 
> negate(firsthalf(src1)));
> -brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
> -brw_ADD(p, sechalf(dst), sechalf(src0), negate(sechalf(src1)));
> - }
> + brw_ADD(p, firsthalf(dst), firsthalf(src1), 
> negate(firsthalf(src0)));
> + brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
> + brw_ADD(p, sechalf(dst), sechalf(src1), negate(sechalf(src0)));
>} else {
> - if (negate_value)
> -brw_ADD(p, dst, src1, negate(src0));
> - else
> -brw_ADD(p, dst, src0, negate(src1));
> + brw_ADD(p, dst, src1, negate(src0));
>}
>brw_pop_insn_state(p);
> } else {
> @@ -1184,10 +1173,7 @@ fs_generator::generate_ddy(enum opcode opcode,
>  BRW_WIDTH_4,
>  BRW_HORIZONTAL_STRIDE_0,
>  BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
> -  if (negate_value)
> - brw_ADD(p, dst, src1, negate(src0));
> -  else
> - brw_ADD(p, dst, src0, negate(src1));
> +  brw_ADD(p, dst, src1, negate(src0));

Here, and above, the code reversed src0/src1 for visual clarity
between the two paths. With just one path, let's put src0 before src1.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/12] nir: Add a simple nir_lower_wpos_center() pass for Vulkan drivers.

2016-05-19 Thread Matt Turner
On Wed, May 18, 2016 at 3:00 PM, Kenneth Graunke  wrote:
> nir_lower_wpos_ytransform() is great for OpenGL, which allows
> applications to choose whether their coordinate system's origin is
> upper left/lower left, and whether the pixel center should be on
> integer/half-integer boundaries.
>
> Vulkan, however, has much simpler requirements: the pixel center
> is always half-integer, and the origin is always upper left.  No
> coordinate transform is needed - we just need to add <0.5, 0.5>.
> This means that we can avoid using (and setting up) a uniform.
>
> I thought about adding more options to nir_lower_wpos_ytransform(),
> but making a new pass that never even touched uniforms seemed simpler.
>
> Signed-off-by: Kenneth Graunke 
> ---
>  src/compiler/Makefile.sources|   1 +
>  src/compiler/nir/nir.h   |   1 +
>  src/compiler/nir/nir_lower_wpos_center.c | 107 
> +++
>  3 files changed, 109 insertions(+)
>  create mode 100644 src/compiler/nir/nir_lower_wpos_center.c
>
> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
> index 97f9eb4..b8f2b49 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -212,6 +212,7 @@ NIR_FILES = \
> nir/nir_lower_vars_to_ssa.c \
> nir/nir_lower_var_copies.c \
> nir/nir_lower_vec_to_movs.c \
> +   nir/nir_lower_wpos_center.c \
> nir/nir_lower_wpos_ytransform.c \
> nir/nir_metadata.c \
> nir/nir_move_vec_src_uses_to_dest.c \
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index a21a7bd..78913d3 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2407,6 +2407,7 @@ typedef struct nir_lower_wpos_ytransform_options {
>
>  bool nir_lower_wpos_ytransform(nir_shader *shader,
> const nir_lower_wpos_ytransform_options 
> *options);
> +bool nir_lower_wpos_center(nir_shader *shader);
>
>  typedef struct nir_lower_drawpixels_options {
> int texcoord_state_tokens[5];
> diff --git a/src/compiler/nir/nir_lower_wpos_center.c 
> b/src/compiler/nir/nir_lower_wpos_center.c
> new file mode 100644
> index 000..d66109d
> --- /dev/null
> +++ b/src/compiler/nir/nir_lower_wpos_center.c
> @@ -0,0 +1,107 @@
> +/*
> + * Copyright © 2015 Red Hat
> + * Copyright © 2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "nir.h"
> +#include "nir_builder.h"
> +#include "program/prog_instruction.h"
> +
> +/**
> + * This pass adds <0.5, 0.5> to all uses of gl_FragCoord.
> + *
> + * Run before nir_lower_io().
> + *
> + * For a more full featured pass, consider using nir_lower_wpos_ytransform(),
> + * which can handle pixel center integer / half integer, and origin lower
> + * left / upper left transformations.
> + *
> + * This simple pass is primarily intended for use by Vulkan drivers on
> + * hardware which provides an integer pixel center.  Vulkan mandates that
> + * the pixel center must be half-integer, and also that the coordinate
> + * system's origin must be upper left.  This means that there's no need
> + * for a uniform - we can always just add a constant.
> + */
> +
> +static void
> +add_half_to_fragcoord(nir_builder *b, nir_intrinsic_instr *intr)
> +{
> +   nir_ssa_def *wpos = >dest.ssa;
> +
> +   assert(intr->dest.is_ssa);
> +
> +   b->cursor = nir_after_instr(>instr);
> +
> +   wpos = nir_fadd(b, wpos, nir_imm_vec4(b, 0.5f, 0.5f, 0.0f, 0.0f));
> +
> +   nir_ssa_def_rewrite_uses_after(>dest.ssa, nir_src_for_ssa(wpos),
> +  >instr);
> +}
> +
> +static bool
> +lower_wpos_center_block(nir_builder *b, nir_block *block)
> +{
> +   bool progress = false;
> +
> +   nir_foreach_instr_safe(instr, block) {

Does this need to be _safe? If it does, why 

Re: [Mesa-dev] [PATCH] nir: Handle double-precision in fabs, frsq, fsqrt, fexp2 and flog2

2016-05-19 Thread Matt Turner
On Thu, May 19, 2016 at 3:36 AM, Iago Toral Quiroga  wrote:
> We agreed in the list that it would be better to have these if they were
> easy to implement.
> ---
>  src/compiler/nir/nir_opcodes.py | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
> index 8a3a80f..6dc0c90 100644
> --- a/src/compiler/nir/nir_opcodes.py
> +++ b/src/compiler/nir/nir_opcodes.py
> @@ -153,13 +153,13 @@ unop("fnot", tfloat, "(src0 == 0.0f) ? 1.0f : 0.0f")
>  unop("fsign", tfloat, "(src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : 
> -1.0f)")
>  unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
>  unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
> -unop("fabs", tfloat, "fabsf(src0)")
> +unop("fabs", tfloat, "bit_size == 64 ? fabs(src0) : fabsf(src0)")

Seems like we should have had this anyway, since the extension
requires abs(double)?

As idr said in that thread, fnot, fsign, and fsat should have explicit
double support as well.

>  unop("fsat", tfloat, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)")
>  unop("frcp", tfloat, "1.0f / src0")
> -unop("frsq", tfloat, "1.0f / sqrtf(src0)")
> -unop("fsqrt", tfloat, "sqrtf(src0)")
> -unop("fexp2", tfloat, "exp2f(src0)")
> -unop("flog2", tfloat, "log2f(src0)")
> +unop("frsq", tfloat, "bit_size == 64 ? 1.0 / sqrt(src0) : 1.0f / 
> sqrtf(src0)")
> +unop("fsqrt", tfloat, "bit_size == 64 ? sqrt(src0) : sqrtf(src0)")

And these?

> +unop("fexp2", tfloat, "bit_size == 64 ? exp2(src0) : exp2f(src0)")
> +unop("flog2", tfloat, "bit_size == 64 ? log2(src0) : log2f(src0)")

I don't know why we'd add these. I think this is just confusing the
issue. I'd separate functions that are actually required by the spec
into a separate patch at least.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] nvc0: clear out surfaces bufctx before rebinding everything

2016-05-19 Thread Samuel Pitoiset



On 05/19/2016 12:02 PM, Pierre Moreau wrote:

On 09:28 PM - May 18 2016, Ilia Mirkin wrote:

Otherwise we can end up in a situation where that bin just grows and
grows.

Signed-off-by: Ilia Mirkin 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index cc5ea5e..2523c20 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -963,6 +963,11 @@ nvc0_validate_suf(struct nvc0_context *nvc0, int s)
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_screen *screen = nvc0->screen;

+   if (s == 5)


This is not specific to this patch, but after seeing various patches with a
special case for `s == 5`, wouldn’t it make sense to have a define for that
index, both to make it a bit clearer to the reader why it is handled
differently, and to avoid typos? Same with having a define for the number of
surfaces (both 3D and CP) and another one for the number of 3D surfaces (or any
other method to avoid looping too many or too few times due to a typo)?


Yep, we should replace those magic numbers with 
nvc0_shader_stage(PIPE_SHADER_COMPUTE) or something.


I plan to do that after images (to avoid conflicts).




+  nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
+   else
+  nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF);
+
for (int i = 0; i < NVC0_MAX_IMAGES; ++i) {
   struct pipe_image_view *view = >images[s][i];
   int width, height, depth;
--
2.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] hash: Add _mesa_HashRemoveLocked() function.

2016-05-19 Thread Matt Turner
Reviewed-by: Timothy Arceri 
Reviewed-by: Brian Paul 
---
I sent these last summer as part of a larger (13) patch series.
The middle of that series was rejected, and I committed the first
4 patches last year. These two, from the end of the series should
still be useful and valid.

 src/mesa/main/hash.c | 19 +++
 src/mesa/main/hash.h |  2 ++
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/mesa/main/hash.c b/src/mesa/main/hash.c
index ab1b9e9..85c29cd 100644
--- a/src/mesa/main/hash.c
+++ b/src/mesa/main/hash.c
@@ -328,8 +328,8 @@ _mesa_HashInsert(struct _mesa_HashTable *table, GLuint key, 
void *data)
  * While holding the hash table's lock, searches the entry with the matching
  * key and unlinks it.
  */
-void
-_mesa_HashRemove(struct _mesa_HashTable *table, GLuint key)
+static inline void
+_mesa_HashRemove_unlocked(struct _mesa_HashTable *table, GLuint key)
 {
struct hash_entry *entry;
 
@@ -343,17 +343,28 @@ _mesa_HashRemove(struct _mesa_HashTable *table, GLuint 
key)
   return;
}
 
-   mtx_lock(>Mutex);
if (key == DELETED_KEY_VALUE) {
   table->deleted_key_data = NULL;
} else {
   entry = _mesa_hash_table_search(table->ht, uint_key(key));
   _mesa_hash_table_remove(table->ht, entry);
}
-   mtx_unlock(>Mutex);
 }
 
 
+void
+_mesa_HashRemoveLocked(struct _mesa_HashTable *table, GLuint key)
+{
+   _mesa_HashRemove_unlocked(table, key);
+}
+
+void
+_mesa_HashRemove(struct _mesa_HashTable *table, GLuint key)
+{
+   mtx_lock(>Mutex);
+   _mesa_HashRemove_unlocked(table, key);
+   mtx_unlock(>Mutex);
+}
 
 /**
  * Delete all entries in a hash table, but don't delete the table itself.
diff --git a/src/mesa/main/hash.h b/src/mesa/main/hash.h
index da3b997..52a6c5d 100644
--- a/src/mesa/main/hash.h
+++ b/src/mesa/main/hash.h
@@ -54,6 +54,8 @@ extern void *_mesa_HashLookupLocked(struct _mesa_HashTable 
*table, GLuint key);
 extern void _mesa_HashInsertLocked(struct _mesa_HashTable *table,
GLuint key, void *data);
 
+extern void _mesa_HashRemoveLocked(struct _mesa_HashTable *table, GLuint key);
+
 extern void
 _mesa_HashDeleteAll(struct _mesa_HashTable *table,
 void (*callback)(GLuint key, void *data, void *userData),
-- 
2.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] mesa: Replace uses of Shared->Mutex with hash-table mutexes

2016-05-19 Thread Matt Turner
We were locking the Shared->Mutex and then using calling functions like
_mesa_HashInsert that do additional per-hash-table locking internally.

Instead just lock each hash-table's mutex and use functions like
_mesa_HashInsertLocked and the new _mesa_HashRemoveLocked.

In order to do this, we need to remove the locking from
_mesa_HashFindFreeKeyBlock since it will always be called with the
per-hash-table lock taken.

Reviewed-by: Timothy Arceri 
Reviewed-by: Brian Paul 
---
 src/mesa/main/arbprogram.c|  7 ++-
 src/mesa/main/atifragshader.c |  6 +-
 src/mesa/main/bufferobj.c | 26 ++
 src/mesa/main/dlist.c |  8 
 src/mesa/main/fbobject.c  | 32 +++-
 src/mesa/main/hash.c  |  4 
 src/mesa/main/samplerobj.c| 23 ++-
 src/mesa/main/shaderapi.c | 10 --
 src/mesa/main/texobj.c| 12 
 9 files changed, 78 insertions(+), 50 deletions(-)

diff --git a/src/mesa/main/arbprogram.c b/src/mesa/main/arbprogram.c
index f474951..3f7acda 100644
--- a/src/mesa/main/arbprogram.c
+++ b/src/mesa/main/arbprogram.c
@@ -200,13 +200,18 @@ _mesa_GenProgramsARB(GLsizei n, GLuint *ids)
if (!ids)
   return;
 
+   _mesa_HashLockMutex(ctx->Shared->Programs);
+
first = _mesa_HashFindFreeKeyBlock(ctx->Shared->Programs, n);
 
/* Insert pointer to dummy program as placeholder */
for (i = 0; i < (GLuint) n; i++) {
-  _mesa_HashInsert(ctx->Shared->Programs, first + i, &_mesa_DummyProgram);
+  _mesa_HashInsertLocked(ctx->Shared->Programs, first + i,
+ &_mesa_DummyProgram);
}
 
+   _mesa_HashUnlockMutex(ctx->Shared->Programs);
+
/* Return the program names */
for (i = 0; i < (GLuint) n; i++) {
   ids[i] = first + i;
diff --git a/src/mesa/main/atifragshader.c b/src/mesa/main/atifragshader.c
index 34f45c6..83a449a 100644
--- a/src/mesa/main/atifragshader.c
+++ b/src/mesa/main/atifragshader.c
@@ -201,11 +201,15 @@ _mesa_GenFragmentShadersATI(GLuint range)
   return 0;
}
 
+   _mesa_HashLockMutex(ctx->Shared->ATIShaders);
+
first = _mesa_HashFindFreeKeyBlock(ctx->Shared->ATIShaders, range);
for (i = 0; i < range; i++) {
-  _mesa_HashInsert(ctx->Shared->ATIShaders, first + i, );
+  _mesa_HashInsertLocked(ctx->Shared->ATIShaders, first + i, );
}
 
+   _mesa_HashUnlockMutex(ctx->Shared->ATIShaders);
+
return first;
 }
 
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index e60a8ea..22073dd 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -1097,8 +1097,11 @@ _mesa_lookup_bufferobj(struct gl_context *ctx, GLuint 
buffer)
 struct gl_buffer_object *
 _mesa_lookup_bufferobj_locked(struct gl_context *ctx, GLuint buffer)
 {
-   return (struct gl_buffer_object *)
-  _mesa_HashLookupLocked(ctx->Shared->BufferObjects, buffer);
+   if (buffer == 0)
+  return NULL;
+   else
+  return (struct gl_buffer_object *)
+ _mesa_HashLookupLocked(ctx->Shared->BufferObjects, buffer);
 }
 
 /**
@@ -1283,10 +1286,11 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids)
   return;
}
 
-   mtx_lock(>Shared->Mutex);
+   _mesa_HashLockMutex(ctx->Shared->BufferObjects);
 
for (i = 0; i < n; i++) {
-  struct gl_buffer_object *bufObj = _mesa_lookup_bufferobj(ctx, ids[i]);
+  struct gl_buffer_object *bufObj =
+ _mesa_lookup_bufferobj_locked(ctx, ids[i]);
   if (bufObj) {
  struct gl_vertex_array_object *vao = ctx->Array.VAO;
  GLuint j;
@@ -1395,7 +1399,7 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids)
  }
 
  /* The ID is immediately freed for re-use */
- _mesa_HashRemove(ctx->Shared->BufferObjects, ids[i]);
+ _mesa_HashRemoveLocked(ctx->Shared->BufferObjects, ids[i]);
  /* Make sure we do not run into the classic ABA problem on bind.
   * We don't want to allow re-binding a buffer object that's been
   * "deleted" by glDeleteBuffers().
@@ -1411,7 +1415,7 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids)
   }
}
 
-   mtx_unlock(>Shared->Mutex);
+   _mesa_HashUnlockMutex(ctx->Shared->BufferObjects);
 }
 
 
@@ -1445,7 +1449,7 @@ create_buffers(GLsizei n, GLuint *buffers, bool dsa)
/*
 * This must be atomic (generation and allocation of buffer object IDs)
 */
-   mtx_lock(>Shared->Mutex);
+   _mesa_HashLockMutex(ctx->Shared->BufferObjects);
 
first = _mesa_HashFindFreeKeyBlock(ctx->Shared->BufferObjects, n);
 
@@ -1460,17 +1464,17 @@ create_buffers(GLsizei n, GLuint *buffers, bool dsa)
  buf = ctx->Driver.NewBufferObject(ctx, buffers[i]);
  if (!buf) {
 _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s", func);
-mtx_unlock(>Shared->Mutex);
+_mesa_HashUnlockMutex(ctx->Shared->BufferObjects);
 return;
  }
   }
   else
  buf = 
 

Re: [Mesa-dev] [PATCH v2 00/19] swr: update rasterizer

2016-05-19 Thread Cherniak, Bruce
Reviewed-by: Bruce Cherniak 



On 5/17/16, 5:36 PM, "mesa-dev on behalf of Tim Rowley" 
 wrote:

>Mostly small cleanups this round.
>
>v2:
>  remove definition of offsetof
>  more descriptive commit messages
>  split threads.cpp cygwin/style changes into two commits
>
>Tim Rowley (19):
>  swr: [rasterizer core] use parens to disambiguate operator precedence
>  swr: [rasterizer common] foreground win32 assert dialog
>  swr: [rasterizer common] portable threadviz buckets
>  swr: [rasterizer core] utility function for getenv
>  swr: [rasterizer core] move variable query outside loop
>  swr: [rasterizer core] add dummy code for cygwin build
>  swr: [rasterizer core] code style cleanup
>  swr: [rasterizer core] removed tabs that snuck in
>  swr: [rasterizer common] include cstddef for offsetof
>  swr: [rasterizer common] guard definition of __cdecl/__stdcall
>  swr: [rasterizer jitter] rename MEMCPY function to MEMCOPY
>  swr: [rasterizer] rename _aligned_malloc to AlignedMalloc
>  swr: [rasterizer common] add OSX to unix portability sections
>  swr: [rasterizer core] clang override for _mm_undefined*
>  swr: [rasterizer core] move MultisampleTrait static from header to cpp
>  swr: [rasterizer jitter] add instancing to non-gather fetch path
>  swr: [rasterizer core] apply KNOB_TOSS_DRAW to more functions
>  swr: [rasterizer jitter] fix assert in AVX implementation of MASKLOADD
>  swr: [rasterizer] utility functions for shared libs
>
> src/gallium/drivers/swr/rasterizer/common/os.h | 33 -
> .../swr/rasterizer/common/rdtsc_buckets.cpp| 13 +++-
> .../drivers/swr/rasterizer/common/swr_assert.cpp   |  2 +-
> src/gallium/drivers/swr/rasterizer/core/api.cpp| 36 +++---
> src/gallium/drivers/swr/rasterizer/core/arena.h|  4 +-
> .../drivers/swr/rasterizer/core/format_types.h |  2 +
> .../drivers/swr/rasterizer/core/frontend.cpp   | 13 ++--
> .../drivers/swr/rasterizer/core/multisample.cpp|  5 +-
> .../drivers/swr/rasterizer/core/multisample.h  |  4 +-
> src/gallium/drivers/swr/rasterizer/core/pa.h   |  8 +--
> .../drivers/swr/rasterizer/core/ringbuffer.h   |  4 +-
> .../drivers/swr/rasterizer/core/threads.cpp| 60 ++--
> .../drivers/swr/rasterizer/core/tilemgr.cpp|  2 +-
> src/gallium/drivers/swr/rasterizer/core/tilemgr.h  |  4 +-
> src/gallium/drivers/swr/rasterizer/core/utils.h| 80 +-
> .../drivers/swr/rasterizer/jitter/builder_misc.cpp |  9 ++-
> .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 42 ++--
> .../drivers/swr/rasterizer/jitter/jit_api.h|  1 +
> .../jitter/scripts/gen_llvm_ir_macros.py   |  2 +-
> src/gallium/drivers/swr/swr_screen.cpp |  8 +--
> 20 files changed, 262 insertions(+), 70 deletions(-)
>
>-- 
>1.9.1
>
>___
>mesa-dev mailing list
>mesa-dev@lists.freedesktop.org
>https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] docs: add swr to GL3.txt

2016-05-19 Thread Ian Romanick
Looks good to me.

Reviewed-by: Ian Romanick 

On 05/17/2016 03:46 PM, Tim Rowley wrote:
> ---
>  docs/GL3.txt | 82 
> ++--
>  1 file changed, 41 insertions(+), 41 deletions(-)
> 
> diff --git a/docs/GL3.txt b/docs/GL3.txt
> index 921a529..7e86f5e 100644
> --- a/docs/GL3.txt
> +++ b/docs/GL3.txt
> @@ -33,7 +33,7 @@ are exposed in the 3.0 context as extensions.
>  Feature Status
>  --- 
> 
>  
> -GL 3.0, GLSL 1.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, 
> softpipe
> +GL 3.0, GLSL 1.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, 
> softpipe, swr
>  
>glBindFragDataLocation, glGetFragDataLocation DONE
>GL_NV_conditional_render (Conditional rendering)  DONE ()
> @@ -60,12 +60,12 @@ GL 3.0, GLSL 1.30 --- all DONE: i965, nv50, nvc0, r600, 
> radeonsi, llvmpipe, soft
>glVertexAttribI commands  DONE
>Depth format cube texturesDONE ()
>GLX_ARB_create_context (GLX 1.4 is required)  DONE
> -  Multisample anti-aliasing DONE (llvmpipe (*), 
> softpipe (*))
> +  Multisample anti-aliasing DONE (llvmpipe (*), 
> softpipe (*), swr (*))
>  
> -(*) llvmpipe and softpipe have fake Multisample anti-aliasing support
> +(*) llvmpipe, softpipe, and swr have fake Multisample anti-aliasing support
>  
>  
> -GL 3.1, GLSL 1.40 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, 
> softpipe
> +GL 3.1, GLSL 1.40 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, 
> softpipe, swr
>  
>Forward compatible context support/deprecations   DONE ()
>GL_ARB_draw_instanced (Instanced drawing) DONE ()
> @@ -82,35 +82,35 @@ GL 3.2, GLSL 1.50 --- all DONE: i965, nv50, nvc0, r600, 
> radeonsi, llvmpipe, soft
>  
>Core/compatibility profiles   DONE
>Geometry shaders  DONE ()
> -  GL_ARB_vertex_array_bgra (BGRA vertex order)  DONE ()
> -  GL_ARB_draw_elements_base_vertex (Base vertex offset) DONE ()
> -  GL_ARB_fragment_coord_conventions (Frag shader coord) DONE ()
> -  GL_ARB_provoking_vertex (Provoking vertex)DONE ()
> -  GL_ARB_seamless_cube_map (Seamless cubemaps)  DONE ()
> -  GL_ARB_texture_multisample (Multisample textures) DONE ()
> -  GL_ARB_depth_clamp (Frag depth clamp) DONE ()
> -  GL_ARB_sync (Fence objects)   DONE ()
> +  GL_ARB_vertex_array_bgra (BGRA vertex order)  DONE (swr)
> +  GL_ARB_draw_elements_base_vertex (Base vertex offset) DONE (swr)
> +  GL_ARB_fragment_coord_conventions (Frag shader coord) DONE (swr)
> +  GL_ARB_provoking_vertex (Provoking vertex)DONE (swr)
> +  GL_ARB_seamless_cube_map (Seamless cubemaps)  DONE (swr)
> +  GL_ARB_texture_multisample (Multisample textures) DONE (swr)
> +  GL_ARB_depth_clamp (Frag depth clamp) DONE (swr)
> +  GL_ARB_sync (Fence objects)   DONE (swr)
>GLX_ARB_create_context_profileDONE
>  
>  
>  GL 3.3, GLSL 3.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, 
> softpipe
>  
> -  GL_ARB_blend_func_extendedDONE ()
> +  GL_ARB_blend_func_extendedDONE (swr)
>GL_ARB_explicit_attrib_location   DONE (all drivers 
> that support GLSL)
> -  GL_ARB_occlusion_query2   DONE ()
> +  GL_ARB_occlusion_query2   DONE (swr)
>GL_ARB_sampler_objectsDONE (all drivers)
> -  GL_ARB_shader_bit_encodingDONE ()
> -  GL_ARB_texture_rgb10_a2ui DONE ()
> -  GL_ARB_texture_swizzleDONE ()
> -  GL_ARB_timer_queryDONE ()
> -  GL_ARB_instanced_arrays   DONE ()
> -  GL_ARB_vertex_type_2_10_10_10_rev DONE ()
> +  GL_ARB_shader_bit_encodingDONE (swr)
> +  GL_ARB_texture_rgb10_a2ui DONE (swr)
> +  GL_ARB_texture_swizzleDONE (swr)
> +  GL_ARB_timer_queryDONE (swr)
> +  GL_ARB_instanced_arrays   DONE (swr)
> +  GL_ARB_vertex_type_2_10_10_10_rev DONE (swr)
>  
>  
>  GL 4.0, GLSL 4.00 --- all DONE: nvc0, r600, radeonsi
>  
> -  GL_ARB_draw_buffers_blend DONE (i965, nv50, 
> llvmpipe, softpipe)
> -  GL_ARB_draw_indirect  DONE (i965, 
> llvmpipe, softpipe)
> +  GL_ARB_draw_buffers_blend   

[Mesa-dev] [PATCH 8/8] drm/amdgpu/gfx8: Enable GFX PG on CZ

2016-05-19 Thread Alex Deucher
From: Tom St Denis 

Based on Alex's patches this enables GFX PG on CZ.

Tested with xonotic-glx/glxgears/supertuxkart and idle desktop.
Also read-back registers via umr for verificiation that the bits
are truly enabled.

Signed-off-by: Tom St Denis 
Reviewed-by: Alex Deucher 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vi.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 2c88d0b..2f5c8aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1207,7 +1207,15 @@ static int vi_common_early_init(void *handle)
AMD_CG_SUPPORT_HDP_LS |
AMD_CG_SUPPORT_SDMA_MGCG |
AMD_CG_SUPPORT_SDMA_LS;
+   /* rev0 hardware doesn't support PG */
adev->pg_flags = 0;
+   if (adev->rev_id != 0x00)
+   adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
+   AMD_PG_SUPPORT_GFX_SMG |
+   AMD_PG_SUPPORT_GFX_DMG |
+   AMD_PG_SUPPORT_CP |
+   AMD_PG_SUPPORT_RLC_SMU_HS |
+   AMD_PG_SUPPORT_GFX_PIPELINE;
adev->external_rev_id = adev->rev_id + 0x1;
break;
case CHIP_STONEY:
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/8] drm/amdgpu/gfx8: add state setup for CZ/ST GFX power gating

2016-05-19 Thread Alex Deucher
This sets up the CP jump table and GDS buffer and sets the
PG state registers.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 133 --
 1 file changed, 128 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index f19bab6..7fcde08 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1129,6 +1129,71 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device 
*adev,
buffer[count++] = cpu_to_le32(0);
 }
 
+static void cz_init_cp_jump_table(struct amdgpu_device *adev)
+{
+   const __le32 *fw_data;
+   volatile u32 *dst_ptr;
+   int me, i, max_me = 4;
+   u32 bo_offset = 0;
+   u32 table_offset, table_size;
+
+   if (adev->asic_type == CHIP_CARRIZO)
+   max_me = 5;
+
+   /* write the cp table buffer */
+   dst_ptr = adev->gfx.rlc.cp_table_ptr;
+   for (me = 0; me < max_me; me++) {
+   if (me == 0) {
+   const struct gfx_firmware_header_v1_0 *hdr =
+   (const struct gfx_firmware_header_v1_0 
*)adev->gfx.ce_fw->data;
+   fw_data = (const __le32 *)
+   (adev->gfx.ce_fw->data +
+
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+   table_offset = le32_to_cpu(hdr->jt_offset);
+   table_size = le32_to_cpu(hdr->jt_size);
+   } else if (me == 1) {
+   const struct gfx_firmware_header_v1_0 *hdr =
+   (const struct gfx_firmware_header_v1_0 
*)adev->gfx.pfp_fw->data;
+   fw_data = (const __le32 *)
+   (adev->gfx.pfp_fw->data +
+
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+   table_offset = le32_to_cpu(hdr->jt_offset);
+   table_size = le32_to_cpu(hdr->jt_size);
+   } else if (me == 2) {
+   const struct gfx_firmware_header_v1_0 *hdr =
+   (const struct gfx_firmware_header_v1_0 
*)adev->gfx.me_fw->data;
+   fw_data = (const __le32 *)
+   (adev->gfx.me_fw->data +
+
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+   table_offset = le32_to_cpu(hdr->jt_offset);
+   table_size = le32_to_cpu(hdr->jt_size);
+   } else if (me == 3) {
+   const struct gfx_firmware_header_v1_0 *hdr =
+   (const struct gfx_firmware_header_v1_0 
*)adev->gfx.mec_fw->data;
+   fw_data = (const __le32 *)
+   (adev->gfx.mec_fw->data +
+
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+   table_offset = le32_to_cpu(hdr->jt_offset);
+   table_size = le32_to_cpu(hdr->jt_size);
+   } else  if (me == 4) {
+   const struct gfx_firmware_header_v1_0 *hdr =
+   (const struct gfx_firmware_header_v1_0 
*)adev->gfx.mec2_fw->data;
+   fw_data = (const __le32 *)
+   (adev->gfx.mec2_fw->data +
+
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+   table_offset = le32_to_cpu(hdr->jt_offset);
+   table_size = le32_to_cpu(hdr->jt_size);
+   }
+
+   for (i = 0; i < table_size; i ++) {
+   dst_ptr[bo_offset + i] =
+   cpu_to_le32(le32_to_cpu(fw_data[table_offset + 
i]));
+   }
+
+   bo_offset += table_size;
+   }
+}
+
 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
 {
int r;
@@ -1144,6 +1209,18 @@ static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
amdgpu_bo_unref(>gfx.rlc.clear_state_obj);
adev->gfx.rlc.clear_state_obj = NULL;
}
+
+   /* jump table block */
+   if (adev->gfx.rlc.cp_table_obj) {
+   r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
+   if (unlikely(r != 0))
+   dev_warn(adev->dev, "(%d) reserve RLC cp table bo 
failed\n", r);
+   amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
+   amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
+
+   amdgpu_bo_unref(>gfx.rlc.cp_table_obj);
+   adev->gfx.rlc.cp_table_obj = NULL;
+   }
 }
 
 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
@@ -1200,6 +1277,46 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);

[Mesa-dev] [PATCH 5/8] drm/amdgpu: add new GFX powergating types

2016-05-19 Thread Alex Deucher
Add some new GFX powergating flags.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/include/amd_shared.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/amd_shared.h 
b/drivers/gpu/drm/amd/include/amd_shared.h
index 6080951..147b2eb 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -120,6 +120,8 @@ enum amd_powergating_state {
 #define AMD_PG_SUPPORT_SDMA(1 << 8)
 #define AMD_PG_SUPPORT_ACP (1 << 9)
 #define AMD_PG_SUPPORT_SAMU(1 << 10)
+#define AMD_PG_SUPPORT_GFX_QUICK_MG(1 << 11)
+#define AMD_PG_SUPPORT_GFX_PIPELINE(1 << 12)
 
 enum amd_pm_state_type {
/* not used for dpm */
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/8] drm/amdgpu/gfx7: expand cp jt size to handle GDS as well

2016-05-19 Thread Alex Deucher
The size needs to handle the CP JT and GDS.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 7f18a53..d58425e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -3205,7 +3205,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
}
}
adev->gfx.rlc.cs_data = ci_cs_data;
-   adev->gfx.rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
+   adev->gfx.rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4; /* CP JT */
+   adev->gfx.rlc.cp_table_size += 64 * 1024; /* GDS */
 
src_ptr = adev->gfx.rlc.reg_list;
dws = adev->gfx.rlc.reg_list_size;
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/8] drm/amdgpu/gfx8: clean up polaris11 PG enable

2016-05-19 Thread Alex Deucher
Fix the logic for enabling/disabling.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 18 +-
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 0508cef..494104e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -5365,12 +5365,20 @@ static int gfx_v8_0_set_powergating_state(void *handle,
gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, 
false);
break;
case CHIP_POLARIS11:
-   if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG)
-   gfx_v8_0_enable_gfx_static_mg_power_gating(adev, 
enable);
-   else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)
-   gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, 
enable);
+   if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
+   gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
+   else
+   gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
+
+   if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
+   gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
+   else
+   gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, 
false);
+
+   if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
+   polaris11_enable_gfx_quick_mg_power_gating(adev, true);
else
-   polaris11_enable_gfx_quick_mg_power_gating(adev, 
enable);
+   polaris11_enable_gfx_quick_mg_power_gating(adev, false);
break;
default:
break;
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/8] drm/amdgpu/gfx8: add powergating support for CZ/ST

2016-05-19 Thread Alex Deucher
This implements powergating support for CZ/ST asics.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 132 --
 1 file changed, 126 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 36ec41f..0508cef 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -3810,6 +3810,53 @@ static void gfx_v8_0_init_power_gating(struct 
amdgpu_device *adev)
}
 }
 
+static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
+   bool enable)
+{
+   u32 data, orig;
+
+   orig = data = RREG32(mmRLC_PG_CNTL);
+
+   if (enable)
+   data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
+   else
+   data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
+
+   if (orig != data)
+   WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
+ bool enable)
+{
+   u32 data, orig;
+
+   orig = data = RREG32(mmRLC_PG_CNTL);
+
+   if (enable)
+   data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
+   else
+   data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
+
+   if (orig != data)
+   WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
+{
+   u32 data, orig;
+
+   orig = data = RREG32(mmRLC_PG_CNTL);
+
+   if (enable)
+   data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
+   else
+   data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
+
+   if (orig != data)
+   WREG32(mmRLC_PG_CNTL, data);
+}
+
 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
 {
if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
@@ -3827,6 +3874,17 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
WREG32(mmRLC_JUMP_TABLE_RESTORE, 
adev->gfx.rlc.cp_table_gpu_addr >> 8);
gfx_v8_0_init_power_gating(adev);
WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, 
adev->gfx.cu_info.ao_cu_mask);
+   if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
+   cz_enable_sck_slow_down_on_power_up(adev, true);
+   cz_enable_sck_slow_down_on_power_down(adev, 
true);
+   } else {
+   cz_enable_sck_slow_down_on_power_up(adev, 
false);
+   cz_enable_sck_slow_down_on_power_down(adev, 
false);
+   }
+   if (adev->pg_flags & AMD_PG_SUPPORT_CP)
+   cz_enable_cp_power_gating(adev, true);
+   else
+   cz_enable_cp_power_gating(adev, false);
} else if (adev->asic_type == CHIP_POLARIS11) {
gfx_v8_0_init_power_gating(adev);
}
@@ -5232,25 +5290,87 @@ static void 
polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *ade
}
 }
 
+static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+   u32 data, orig;
+
+   orig = data = RREG32(mmRLC_PG_CNTL);
+
+   if (enable)
+   data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+   else
+   data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+
+   if (orig != data)
+   WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
+   bool enable)
+{
+   u32 data, orig;
+
+   orig = data = RREG32(mmRLC_PG_CNTL);
+
+   if (enable)
+   data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
+   else
+   data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
+
+   if (orig != data)
+   WREG32(mmRLC_PG_CNTL, data);
+
+   /* Read any GFX register to wake up GFX. */
+   if (!enable)
+   data = RREG32(mmDB_RENDER_CONTROL);
+}
+
+static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+   if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
+   cz_enable_gfx_cg_power_gating(adev, true);
+   if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
+   cz_enable_gfx_pipeline_power_gating(adev, true);
+   } else {
+   cz_enable_gfx_cg_power_gating(adev, false);
+   cz_enable_gfx_pipeline_power_gating(adev, false);
+   }
+}
+
 static int gfx_v8_0_set_powergating_state(void *handle,
  enum 

[Mesa-dev] [PATCH 2/8] drm/radeon/gfx7: expand cp jt size to handle GDS as well

2016-05-19 Thread Alex Deucher
The size needs to handle the CP JT and GDS.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/radeon/cik.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index ba192a3..5c88c1c 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -8354,7 +8354,8 @@ static int cik_startup(struct radeon_device *rdev)
}
}
rdev->rlc.cs_data = ci_cs_data;
-   rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
+   rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4; /* CP JT */
+   rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
r = sumo_rlc_init(rdev);
if (r) {
DRM_ERROR("Failed to init rlc BOs!\n");
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/8] drm/amdgpu/gfx8: rename some pg functions

2016-05-19 Thread Alex Deucher
So they can be shared with other asics.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 7fcde08..36ec41f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -5162,15 +5162,17 @@ static int gfx_v8_0_late_init(void *handle)
return 0;
 }
 
-static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device 
*adev,
-   bool enable)
+static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device 
*adev,
+  bool enable)
 {
uint32_t data, temp;
 
-   /* Send msg to SMU via Powerplay */
-   amdgpu_set_powergating_state(adev,
-   AMD_IP_BLOCK_TYPE_SMC,
-   enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
+   if (adev->asic_type == CHIP_POLARIS11)
+   /* Send msg to SMU via Powerplay */
+   amdgpu_set_powergating_state(adev,
+AMD_IP_BLOCK_TYPE_SMC,
+enable ?
+AMD_PG_STATE_GATE : 
AMD_PG_STATE_UNGATE);
 
if (enable) {
/* Enable static MGPG */
@@ -5188,8 +5190,8 @@ static void 
polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *ad
}
 }
 
-static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device 
*adev,
-   bool enable)
+static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device 
*adev,
+   bool enable)
 {
uint32_t data, temp;
 
@@ -5241,10 +5243,10 @@ static int gfx_v8_0_set_powergating_state(void *handle,
switch (adev->asic_type) {
case CHIP_POLARIS11:
if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG)
-   polaris11_enable_gfx_static_mg_power_gating(adev,
+   gfx_v8_0_enable_gfx_static_mg_power_gating(adev,
state == AMD_PG_STATE_GATE ? true : 
false);
else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)
-   polaris11_enable_gfx_dynamic_mg_power_gating(adev,
+   gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev,
state == AMD_PG_STATE_GATE ? true : 
false);
else
polaris11_enable_gfx_quick_mg_power_gating(adev,
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/8] Add GFX powergating support for CZ

2016-05-19 Thread Alex Deucher
This patch set adds powergating support for the gfx block
on Carrizo.  Also fixes a few issues with powergating setup
on older asics.  Powergating improves idle powersaving.

Alex Deucher (7):
  drm/amdgpu/gfx7: expand cp jt size to handle GDS as well
  drm/radeon/gfx7: expand cp jt size to handle GDS as well
  drm/amdgpu/gfx8: add state setup for CZ/ST GFX power gating
  drm/amdgpu/gfx8: rename some pg functions
  drm/amdgpu: add new GFX powergating types
  drm/amdgpu/gfx8: add powergating support for CZ/ST
  drm/amdgpu/gfx8: clean up polaris11 PG enable

Tom St Denis (1):
  drm/amdgpu/gfx8: Enable GFX PG on CZ

 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c|   3 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c| 295 ---
 drivers/gpu/drm/amd/amdgpu/vi.c  |   8 +
 drivers/gpu/drm/amd/include/amd_shared.h |   2 +
 drivers/gpu/drm/radeon/cik.c |   3 +-
 5 files changed, 288 insertions(+), 23 deletions(-)

-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50/ir: Add missing handling of U64/S64 in inlines

2016-05-19 Thread Ilia Mirkin
Reviewed-by: Ilia Mirkin 

On Thu, May 19, 2016 at 2:13 PM, Pierre Moreau  wrote:
> Signed-off-by: Pierre Moreau 
> ---
>
> U64/S64 support is missing in other places of codegen (like in
> nv50_ir_peephole.cpp for example), however its absence will result in code not
> being as optimised as it could have. Adding that support is not as straight
> forward as this patch, and will need a thorough testing to avoid any
> regressions, therefore I’m only sending this patch for now.
>
>  src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
> index 4c5de2e..4cb53ab 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
> @@ -126,7 +126,7 @@ static inline bool isFloatType(DataType ty)
>
>  static inline bool isSignedIntType(DataType ty)
>  {
> -   return (ty == TYPE_S8 || ty == TYPE_S16 || ty == TYPE_S32);
> +   return (ty == TYPE_S8 || ty == TYPE_S16 || ty == TYPE_S32 || ty == 
> TYPE_S64);
>  }
>
>  static inline bool isSignedType(DataType ty)
> @@ -136,6 +136,7 @@ static inline bool isSignedType(DataType ty)
> case TYPE_U8:
> case TYPE_U16:
> case TYPE_U32:
> +   case TYPE_U64:
> case TYPE_B96:
> case TYPE_B128:
>return false;
> @@ -147,6 +148,7 @@ static inline bool isSignedType(DataType ty)
>  static inline DataType intTypeToSigned(DataType ty)
>  {
> switch (ty) {
> +   case TYPE_U64: return TYPE_S64;
> case TYPE_U32: return TYPE_S32;
> case TYPE_U16: return TYPE_S16;
> case TYPE_U8: return TYPE_S8;
> --
> 2.8.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50/ir: Add missing handling of U64/S64 in inlines

2016-05-19 Thread Pierre Moreau
Signed-off-by: Pierre Moreau 
---

U64/S64 support is missing in other places of codegen (like in
nv50_ir_peephole.cpp for example), however its absence will result in code not
being as optimised as it could have. Adding that support is not as straight
forward as this patch, and will need a thorough testing to avoid any
regressions, therefore I’m only sending this patch for now.

 src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
index 4c5de2e..4cb53ab 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
@@ -126,7 +126,7 @@ static inline bool isFloatType(DataType ty)
 
 static inline bool isSignedIntType(DataType ty)
 {
-   return (ty == TYPE_S8 || ty == TYPE_S16 || ty == TYPE_S32);
+   return (ty == TYPE_S8 || ty == TYPE_S16 || ty == TYPE_S32 || ty == 
TYPE_S64);
 }
 
 static inline bool isSignedType(DataType ty)
@@ -136,6 +136,7 @@ static inline bool isSignedType(DataType ty)
case TYPE_U8:
case TYPE_U16:
case TYPE_U32:
+   case TYPE_U64:
case TYPE_B96:
case TYPE_B128:
   return false;
@@ -147,6 +148,7 @@ static inline bool isSignedType(DataType ty)
 static inline DataType intTypeToSigned(DataType ty)
 {
switch (ty) {
+   case TYPE_U64: return TYPE_S64;
case TYPE_U32: return TYPE_S32;
case TYPE_U16: return TYPE_S16;
case TYPE_U8: return TYPE_S8;
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] docs: add swr to GL3.txt

2016-05-19 Thread Cherniak, Bruce
Reviewed-by: Bruce Cherniak 




On 5/17/16, 5:46 PM, "mesa-dev on behalf of Tim Rowley" 
 wrote:

>---
> docs/GL3.txt | 82 ++--
> 1 file changed, 41 insertions(+), 41 deletions(-)
>
>diff --git a/docs/GL3.txt b/docs/GL3.txt
>index 921a529..7e86f5e 100644
>--- a/docs/GL3.txt
>+++ b/docs/GL3.txt
>@@ -33,7 +33,7 @@ are exposed in the 3.0 context as extensions.
> Feature Status
> --- 
> 
> 
>-GL 3.0, GLSL 1.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, 
>softpipe
>+GL 3.0, GLSL 1.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, 
>softpipe, swr
> 
>   glBindFragDataLocation, glGetFragDataLocation DONE
>   GL_NV_conditional_render (Conditional rendering)  DONE ()
>@@ -60,12 +60,12 @@ GL 3.0, GLSL 1.30 --- all DONE: i965, nv50, nvc0, r600, 
>radeonsi, llvmpipe, soft
>   glVertexAttribI commands  DONE
>   Depth format cube texturesDONE ()
>   GLX_ARB_create_context (GLX 1.4 is required)  DONE
>-  Multisample anti-aliasing DONE (llvmpipe (*), 
>softpipe (*))
>+  Multisample anti-aliasing DONE (llvmpipe (*), 
>softpipe (*), swr (*))
> 
>-(*) llvmpipe and softpipe have fake Multisample anti-aliasing support
>+(*) llvmpipe, softpipe, and swr have fake Multisample anti-aliasing support
> 
> 
>-GL 3.1, GLSL 1.40 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, 
>softpipe
>+GL 3.1, GLSL 1.40 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, 
>softpipe, swr
> 
>   Forward compatible context support/deprecations   DONE ()
>   GL_ARB_draw_instanced (Instanced drawing) DONE ()
>@@ -82,35 +82,35 @@ GL 3.2, GLSL 1.50 --- all DONE: i965, nv50, nvc0, r600, 
>radeonsi, llvmpipe, soft
> 
>   Core/compatibility profiles   DONE
>   Geometry shaders  DONE ()
>-  GL_ARB_vertex_array_bgra (BGRA vertex order)  DONE ()
>-  GL_ARB_draw_elements_base_vertex (Base vertex offset) DONE ()
>-  GL_ARB_fragment_coord_conventions (Frag shader coord) DONE ()
>-  GL_ARB_provoking_vertex (Provoking vertex)DONE ()
>-  GL_ARB_seamless_cube_map (Seamless cubemaps)  DONE ()
>-  GL_ARB_texture_multisample (Multisample textures) DONE ()
>-  GL_ARB_depth_clamp (Frag depth clamp) DONE ()
>-  GL_ARB_sync (Fence objects)   DONE ()
>+  GL_ARB_vertex_array_bgra (BGRA vertex order)  DONE (swr)
>+  GL_ARB_draw_elements_base_vertex (Base vertex offset) DONE (swr)
>+  GL_ARB_fragment_coord_conventions (Frag shader coord) DONE (swr)
>+  GL_ARB_provoking_vertex (Provoking vertex)DONE (swr)
>+  GL_ARB_seamless_cube_map (Seamless cubemaps)  DONE (swr)
>+  GL_ARB_texture_multisample (Multisample textures) DONE (swr)
>+  GL_ARB_depth_clamp (Frag depth clamp) DONE (swr)
>+  GL_ARB_sync (Fence objects)   DONE (swr)
>   GLX_ARB_create_context_profileDONE
> 
> 
> GL 3.3, GLSL 3.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, 
> softpipe
> 
>-  GL_ARB_blend_func_extendedDONE ()
>+  GL_ARB_blend_func_extendedDONE (swr)
>   GL_ARB_explicit_attrib_location   DONE (all drivers 
> that support GLSL)
>-  GL_ARB_occlusion_query2   DONE ()
>+  GL_ARB_occlusion_query2   DONE (swr)
>   GL_ARB_sampler_objectsDONE (all drivers)
>-  GL_ARB_shader_bit_encodingDONE ()
>-  GL_ARB_texture_rgb10_a2ui DONE ()
>-  GL_ARB_texture_swizzleDONE ()
>-  GL_ARB_timer_queryDONE ()
>-  GL_ARB_instanced_arrays   DONE ()
>-  GL_ARB_vertex_type_2_10_10_10_rev DONE ()
>+  GL_ARB_shader_bit_encodingDONE (swr)
>+  GL_ARB_texture_rgb10_a2ui DONE (swr)
>+  GL_ARB_texture_swizzleDONE (swr)
>+  GL_ARB_timer_queryDONE (swr)
>+  GL_ARB_instanced_arrays   DONE (swr)
>+  GL_ARB_vertex_type_2_10_10_10_rev DONE (swr)
> 
> 
> GL 4.0, GLSL 4.00 --- all DONE: nvc0, r600, radeonsi
> 
>-  GL_ARB_draw_buffers_blend DONE (i965, nv50, 
>llvmpipe, softpipe)
>-  GL_ARB_draw_indirect  DONE (i965, llvmpipe, 
>softpipe)
>+  GL_ARB_draw_buffers_blend

Re: [Mesa-dev] [PATCH 01/13] vbo: Declare the index range invalid for DrawIndirect

2016-05-19 Thread Ian Romanick
On 05/19/2016 12:20 AM, Jason Ekstrand wrote:
> Right now, we're just setting the range to [0, MAX_UINT32] which, while
> correct isn't helpful.  With DrawIndirect, you can't really know what the
> actual range is so we may as well flag it as being an invalid range.  This
> is what we do for draws with index buffer which is similar (the indices
> aren't statically known) if a bit simpler.
> 
> Cc: "10.2" 

I don't think this is the stable tagging that you intended. :)  These
are definitely candidates for as far back as Emil is cherry-picking
patches.  Maybe "11.0 11.1 11.2" ?

In any case, as discussed on IRC, this patch is

Reviewed-by: Ian Romanick 

> ---
>  src/mesa/vbo/vbo_context.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
> index 9f807a1..ae5d265 100644
> --- a/src/mesa/vbo/vbo_context.c
> +++ b/src/mesa/vbo/vbo_context.c
> @@ -170,7 +170,7 @@ vbo_draw_indirect_prims(struct gl_context *ctx,
> }
>  
> vbo->draw_prims(ctx, prim, draw_count,
> -   ib, GL_TRUE, 0, ~0,
> +   ib, false, ~0, ~0,

I'd be in favor of a follow-up patch that converts this GLboolean to
bool everywhere. :)

> NULL, 0,
> ctx->DrawIndirectBuffer);
>  

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/13] glsl/linker: Add a helper variable for compiler options

2016-05-19 Thread Ian Romanick
This patch is

Reviewed-by: Ian Romanick 

On 05/19/2016 12:21 AM, Jason Ekstrand wrote:
> ---
>  src/compiler/glsl/linker.cpp | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
> index 70c6317..71a71df 100644
> --- a/src/compiler/glsl/linker.cpp
> +++ b/src/compiler/glsl/linker.cpp
> @@ -4875,10 +4875,13 @@ link_shaders(struct gl_context *ctx, struct 
> gl_shader_program *prog)
>if (prog->_LinkedShaders[i] == NULL)
>continue;
>  
> -  if (ctx->Const.ShaderCompilerOptions[i].LowerBufferInterfaceBlocks)
> +  const struct gl_shader_compiler_options *options =
> + >Const.ShaderCompilerOptions[i];
> +
> +  if (options->LowerBufferInterfaceBlocks)
>   lower_ubo_reference(prog->_LinkedShaders[i]);
>  
> -  if (ctx->Const.ShaderCompilerOptions[i].LowerShaderSharedVariables)
> +  if (options->LowerShaderSharedVariables)
>   lower_shared_reference(prog->_LinkedShaders[i],
>  >Comp.SharedSize);
>  
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/13] glsl: Add an option to clamp block indices when lowering UBO/SSBOs

2016-05-19 Thread Ian Romanick
So... what did we decide for arrays of atomic counters?  Do we need an
extra pass for that or ... ?

Also... how does this handle the possibly unsized (actually
draw-time-sized) array at the end of an SSBO?

For UBOs, I think this patch is definitely sufficient, and I think it
improves things quite a lot for SSBOs.  We may need some more, but this
patch is

Reviewed-by: Ian Romanick 

On 05/19/2016 12:21 AM, Jason Ekstrand wrote:
> This prevents array overflow when the block is actually an array of UBOs or
> SSBOs.  On some hardware such as i965, such overflows can cause GPU hangs.
> ---
>  src/compiler/glsl/ir_optimization.h   |  2 +-
>  src/compiler/glsl/linker.cpp  |  3 ++-
>  src/compiler/glsl/lower_ubo_reference.cpp | 36 
> +++
>  src/mesa/drivers/dri/i965/brw_compiler.c  |  1 +
>  src/mesa/main/mtypes.h|  3 +++
>  5 files changed, 39 insertions(+), 6 deletions(-)
> 
> diff --git a/src/compiler/glsl/ir_optimization.h 
> b/src/compiler/glsl/ir_optimization.h
> index 5fc2740..4afa37e 100644
> --- a/src/compiler/glsl/ir_optimization.h
> +++ b/src/compiler/glsl/ir_optimization.h
> @@ -123,7 +123,7 @@ bool lower_clip_distance(gl_shader *shader);
>  void lower_output_reads(unsigned stage, exec_list *instructions);
>  bool lower_packing_builtins(exec_list *instructions, int op_mask);
>  void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size);
> -void lower_ubo_reference(struct gl_shader *shader);
> +void lower_ubo_reference(struct gl_shader *shader, bool clamp_block_indices);
>  void lower_packed_varyings(void *mem_ctx,
> unsigned locations_used, ir_variable_mode mode,
> unsigned gs_input_vertices, gl_shader *shader,
> diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
> index 71a71df..07c8263 100644
> --- a/src/compiler/glsl/linker.cpp
> +++ b/src/compiler/glsl/linker.cpp
> @@ -4879,7 +4879,8 @@ link_shaders(struct gl_context *ctx, struct 
> gl_shader_program *prog)
>   >Const.ShaderCompilerOptions[i];
>  
>if (options->LowerBufferInterfaceBlocks)
> - lower_ubo_reference(prog->_LinkedShaders[i]);
> + lower_ubo_reference(prog->_LinkedShaders[i],
> + options->ClampBlockIndicesToArrayBounds);
>  
>if (options->LowerShaderSharedVariables)
>   lower_shared_reference(prog->_LinkedShaders[i],
> diff --git a/src/compiler/glsl/lower_ubo_reference.cpp 
> b/src/compiler/glsl/lower_ubo_reference.cpp
> index 1a0140f..749deed 100644
> --- a/src/compiler/glsl/lower_ubo_reference.cpp
> +++ b/src/compiler/glsl/lower_ubo_reference.cpp
> @@ -44,8 +44,10 @@ namespace {
>  class lower_ubo_reference_visitor :
>public lower_buffer_access::lower_buffer_access {
>  public:
> -   lower_ubo_reference_visitor(struct gl_shader *shader)
> -   : shader(shader), struct_field(NULL), variable(NULL)
> +   lower_ubo_reference_visitor(struct gl_shader *shader,
> +   bool clamp_block_indices)
> +   : shader(shader), clamp_block_indices(clamp_block_indices),
> + struct_field(NULL), variable(NULL)
> {
> }
>  
> @@ -104,6 +106,7 @@ public:
> ir_visitor_status visit_enter(ir_call *ir);
>  
> struct gl_shader *shader;
> +   bool clamp_block_indices;
> struct gl_uniform_buffer_variable *ubo_var;
> const struct glsl_struct_field *struct_field;
> ir_variable *variable;
> @@ -242,6 +245,26 @@ interface_field_name(void *mem_ctx, char *base_name, 
> ir_rvalue *d,
> return NULL;
>  }
>  
> +static ir_rvalue *
> +clamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type *type)
> +{
> +   assert(type->is_array());
> +
> +   const unsigned array_size = type->arrays_of_arrays_size();
> +
> +   ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1);
> +   max_index->type = index->type;
> +
> +   ir_constant *zero = new(mem_ctx) ir_constant(0);
> +   zero->type = index->type;
> +
> +   if (index->type->base_type == GLSL_TYPE_INT)
> +  index = max2(index, zero);
> +   index = min2(index, max_index);
> +
> +   return index;
> +}
> +
>  void
>  lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
>   ir_variable *var,
> @@ -258,6 +281,11 @@ 
> lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
>interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
> deref, _block_index);
>  
> +   if (nonconst_block_index && clamp_block_indices) {
> +  nonconst_block_index =
> + clamp_to_array_bounds(mem_ctx, nonconst_block_index, var->type);
> +   }
> +
> /* Locate the block by interface name */
> unsigned num_blocks;
> struct gl_uniform_block **blocks;
> @@ -1062,9 +1090,9 @@ lower_ubo_reference_visitor::visit_enter(ir_call *ir)
>  } /* unnamed namespace */
>  

Re: [Mesa-dev] [PATCH 02/12] nir: Fix fddy swizzles in nir_lower_wpos_ytransform().

2016-05-19 Thread Rob Clark
On Wed, May 18, 2016 at 6:00 PM, Kenneth Graunke  wrote:
> The original value might have been swizzled.  That's taken care of in
> the fmul source - we don't want to reswizzle it again.
>
> Fixes validation failures in glsl-derivs-varyings on a branch of mine
> which uses this pass in i965.
>
> Signed-off-by: Kenneth Graunke 

Reviewed-by: Rob Clark 

> ---
>  src/compiler/nir/nir_lower_wpos_ytransform.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/src/compiler/nir/nir_lower_wpos_ytransform.c 
> b/src/compiler/nir/nir_lower_wpos_ytransform.c
> index 5546788..41f8554 100644
> --- a/src/compiler/nir/nir_lower_wpos_ytransform.c
> +++ b/src/compiler/nir/nir_lower_wpos_ytransform.c
> @@ -252,6 +252,9 @@ lower_fddy(lower_wpos_ytransform_state *state, 
> nir_alu_instr *fddy)
> nir_instr_rewrite_src(>instr,
>   >src[0].src,
>   nir_src_for_ssa(pt));
> +
> +   for (unsigned i = 0; i < 4; i++)
> +  fddy->src[0].swizzle[i] = MIN2(i, pt->num_components - 1);
>  }
>
>  static bool
> --
> 2.8.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] anv: Enable textureCompressionASTC_LDR on Gen9+

2016-05-19 Thread Nanley Chery
On Wed, May 18, 2016 at 10:18:30PM -0700, Jason Ekstrand wrote:
> Do we pass all the ASTC CTS tests?  If so,
> 
> Reviewed-by: Jason Ekstrand 
> 

We do. Thanks!

> That wasn't nearly as much work as we'd feared it would be.  Hooray for ISL!

Yes, ISL made this work quite simple.

- Nanley

> On May 18, 2016 5:33 PM, "Nanley Chery"  wrote:
> 
> > From: Nanley Chery 
> >
> > Signed-off-by: Nanley Chery 
> > ---
> >  src/intel/vulkan/anv_device.c  |  2 +-
> >  src/intel/vulkan/anv_formats.c | 56
> > +-
> >  2 files changed, 29 insertions(+), 29 deletions(-)
> >
> > diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> > index 5b5b095..e836ed7 100644
> > --- a/src/intel/vulkan/anv_device.c
> > +++ b/src/intel/vulkan/anv_device.c
> > @@ -381,7 +381,7 @@ void anv_GetPhysicalDeviceFeatures(
> >.samplerAnisotropy= false, /* FINISHME */
> >.textureCompressionETC2   = pdevice->info->gen >= 8
> > ||
> >
> >  pdevice->info->is_baytrail,
> > -  .textureCompressionASTC_LDR   = false, /* FINISHME */
> > +  .textureCompressionASTC_LDR   = pdevice->info->gen >=
> > 9, /* FINISHME CHV */
> >.textureCompressionBC = true,
> >.occlusionQueryPrecise= true,
> >.pipelineStatisticsQuery  = false,
> > diff --git a/src/intel/vulkan/anv_formats.c
> > b/src/intel/vulkan/anv_formats.c
> > index e2c9cd2..a920ab4 100644
> > --- a/src/intel/vulkan/anv_formats.c
> > +++ b/src/intel/vulkan/anv_formats.c
> > @@ -196,34 +196,34 @@ static const struct anv_format anv_formats[] = {
> > fmt(VK_FORMAT_EAC_R11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_R11),
> > fmt(VK_FORMAT_EAC_R11G11_UNORM_BLOCK,  ISL_FORMAT_EAC_RG11),
> > fmt(VK_FORMAT_EAC_R11G11_SNORM_BLOCK,  ISL_FORMAT_EAC_SIGNED_RG11),
> > -   fmt(VK_FORMAT_ASTC_4x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_5x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_5x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_6x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_6x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_8x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_8x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_8x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_10x5_SRGB_BLOCK,ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_10x6_SRGB_BLOCK,ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_10x8_SRGB_BLOCK,ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_10x10_SRGB_BLOCK,   ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_12x10_SRGB_BLOCK,   ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_12x12_SRGB_BLOCK,   ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_4x4_UNORM_BLOCK,ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_5x4_UNORM_BLOCK,ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_5x5_UNORM_BLOCK,ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_6x5_UNORM_BLOCK,ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_6x6_UNORM_BLOCK,ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_8x5_UNORM_BLOCK,ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_8x6_UNORM_BLOCK,ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_8x8_UNORM_BLOCK,ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_10x5_UNORM_BLOCK,   ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_10x6_UNORM_BLOCK,   ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_10x8_UNORM_BLOCK,   ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_10x10_UNORM_BLOCK,  ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_12x10_UNORM_BLOCK,  ISL_FORMAT_UNSUPPORTED),
> > -   fmt(VK_FORMAT_ASTC_12x12_UNORM_BLOCK,  ISL_FORMAT_UNSUPPORTED),
> > +   fmt(VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
> >  ISL_FORMAT_ASTC_LDR_2D_4X4_U8SRGB),
> > +   fmt(VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
> >  ISL_FORMAT_ASTC_LDR_2D_5X4_U8SRGB),
> > +   fmt(VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
> >  ISL_FORMAT_ASTC_LDR_2D_5X5_U8SRGB),
> > +   fmt(VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
> >  ISL_FORMAT_ASTC_LDR_2D_6X5_U8SRGB),
> > +   fmt(VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
> >  ISL_FORMAT_ASTC_LDR_2D_6X6_U8SRGB),
> > +   fmt(VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
> >  ISL_FORMAT_ASTC_LDR_2D_8X5_U8SRGB),
> > +   fmt(VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
> >  ISL_FORMAT_ASTC_LDR_2D_8X6_U8SRGB),
> > +   fmt(VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
> >  ISL_FORMAT_ASTC_LDR_2D_8X8_U8SRGB),
> > +   fmt(VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
> > ISL_FORMAT_ASTC_LDR_2D_10X5_U8SRGB),
> > +   fmt(VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
> > ISL_FORMAT_ASTC_LDR_2D_10X6_U8SRGB),
> > +   fmt(VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
> > ISL_FORMAT_ASTC_LDR_2D_10X8_U8SRGB),
> > +   

Re: [Mesa-dev] [PATCH 07/12] nir: Don't use ffma in nir_lower_wpos_ytransform().

2016-05-19 Thread Rob Clark
On Wed, May 18, 2016 at 6:00 PM, Kenneth Graunke  wrote:
> ffma is an explicitly fused multiply add with higher precision.
> The optimizer will take care of promoting mul/add to fma when
> it's beneficial to do so.
>
> This fixes failures on Gen4-5 when using this pass, as those platforms
> don't actually implement fma().

hmm, we can't rely on the opt-algebraic pass to do this?

BR,
-R

> Signed-off-by: Kenneth Graunke 
> ---
>  src/compiler/nir/nir_lower_wpos_ytransform.c | 20 
>  1 file changed, 8 insertions(+), 12 deletions(-)
>
> diff --git a/src/compiler/nir/nir_lower_wpos_ytransform.c 
> b/src/compiler/nir/nir_lower_wpos_ytransform.c
> index 36e25b9..ccf0fd3 100644
> --- a/src/compiler/nir/nir_lower_wpos_ytransform.c
> +++ b/src/compiler/nir/nir_lower_wpos_ytransform.c
> @@ -123,19 +123,15 @@ emit_wpos_adjustment(lower_wpos_ytransform_state *state,
>  * inversion/identity, or the other way around if we're drawing to an FBO.
>  */
> if (invert) {
> -  /* MAD wpos_temp.y, wpos_input, wpostrans., wpostrans.
> -   */
> -  wpos_temp_y = nir_ffma(b,
> - nir_channel(b, wpos_temp, 1),
> - nir_channel(b, wpostrans, 0),
> - nir_channel(b, wpostrans, 1));
> +  /* wpos_temp.y = wpos_input * wpostrans. + wpostrans. */
> +  wpos_temp_y = nir_fadd(b, nir_fmul(b, nir_channel(b, wpos_temp, 1),
> +nir_channel(b, wpostrans, 0)),
> +nir_channel(b, wpostrans, 1));
> } else {
> -  /* MAD wpos_temp.y, wpos_input, wpostrans., wpostrans.
> -   */
> -  wpos_temp_y = nir_ffma(b,
> - nir_channel(b, wpos_temp, 1),
> - nir_channel(b, wpostrans, 2),
> - nir_channel(b, wpostrans, 3));
> +  /* wpos_temp.y = wpos_input * wpostrans. + wpostrans. */
> +  wpos_temp_y = nir_fadd(b, nir_fmul(b, nir_channel(b, wpos_temp, 1),
> +nir_channel(b, wpostrans, 2)),
> +nir_channel(b, wpostrans, 3));
> }
>
> wpos_temp = nir_vec4(b,
> --
> 2.8.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/12] nir: Add interp_var_at_offset flipping.

2016-05-19 Thread Rob Clark
On Wed, May 18, 2016 at 6:00 PM, Kenneth Graunke  wrote:
> The Y-offset needs flipping as well, similar to ddy.
>
> Signed-off-by: Kenneth Graunke 

Reviewed-by: Rob Clark 

> ---
>  src/compiler/nir/nir_lower_wpos_ytransform.c | 21 +
>  1 file changed, 21 insertions(+)
>
> diff --git a/src/compiler/nir/nir_lower_wpos_ytransform.c 
> b/src/compiler/nir/nir_lower_wpos_ytransform.c
> index 41f8554..4dc9d95 100644
> --- a/src/compiler/nir/nir_lower_wpos_ytransform.c
> +++ b/src/compiler/nir/nir_lower_wpos_ytransform.c
> @@ -257,6 +257,25 @@ lower_fddy(lower_wpos_ytransform_state *state, 
> nir_alu_instr *fddy)
>fddy->src[0].swizzle[i] = MIN2(i, pt->num_components - 1);
>  }
>
> +/* Multiply interp_var_at_offset's offset by transform.x to flip it. */
> +static void
> +lower_interp_var_at_offset(lower_wpos_ytransform_state *state,
> +   nir_intrinsic_instr *interp)
> +{
> +   nir_builder *b = >b;
> +   nir_ssa_def *offset;
> +   nir_ssa_def *flip_y;
> +
> +   b->cursor = nir_before_instr(>instr);
> +
> +   offset = nir_ssa_for_src(b, interp->src[0], 2);
> +   flip_y = nir_fmul(b, nir_channel(b, offset, 1),
> +nir_channel(b, get_transform(state), 0));
> +   nir_instr_rewrite_src(>instr, >src[0],
> + nir_src_for_ssa(nir_vec2(b, nir_channel(b, offset, 
> 0),
> + flip_y)));
> +}
> +
>  static bool
>  lower_wpos_ytransform_block(lower_wpos_ytransform_state *state, nir_block 
> *block)
>  {
> @@ -272,6 +291,8 @@ lower_wpos_ytransform_block(lower_wpos_ytransform_state 
> *state, nir_block *block
> assert(dvar->deref.child == NULL);
> lower_fragcoord(state, intr);
>  }
> + } else if (intr->intrinsic == nir_intrinsic_interp_var_at_offset) {
> +lower_interp_var_at_offset(state, intr);
>   }
>} else if (instr->type == nir_instr_type_alu) {
>   nir_alu_instr *alu = nir_instr_as_alu(instr);
> --
> 2.8.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/12] nir: Handle fddy_fine and fddy_coarse in nir_lower_wpos_ytransform.

2016-05-19 Thread Rob Clark
On Wed, May 18, 2016 at 6:00 PM, Kenneth Graunke  wrote:
> These also need flipping!
>
> Signed-off-by: Kenneth Graunke 

Reviewed-by: Rob Clark 

> ---
>  src/compiler/nir/nir_lower_wpos_ytransform.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/src/compiler/nir/nir_lower_wpos_ytransform.c 
> b/src/compiler/nir/nir_lower_wpos_ytransform.c
> index 7741df2..36e25b9 100644
> --- a/src/compiler/nir/nir_lower_wpos_ytransform.c
> +++ b/src/compiler/nir/nir_lower_wpos_ytransform.c
> @@ -297,7 +297,9 @@ lower_wpos_ytransform_block(lower_wpos_ytransform_state 
> *state, nir_block *block
>   }
>} else if (instr->type == nir_instr_type_alu) {
>   nir_alu_instr *alu = nir_instr_as_alu(instr);
> - if (alu->op == nir_op_fddy)
> + if (alu->op == nir_op_fddy ||
> + alu->op == nir_op_fddy_fine ||
> + alu->op == nir_op_fddy_coarse)
>  lower_fddy(state, alu);
>}
> }
> --
> 2.8.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/13] vbo: Declare the index range invalid for DrawIndirect

2016-05-19 Thread Iago Toral
I left a minor comment in patch 4,  but other than that patches 1-4 are:
Reviewed-by: Iago Toral Quiroga 

On Thu, 2016-05-19 at 00:20 -0700, Jason Ekstrand wrote:
> Right now, we're just setting the range to [0, MAX_UINT32] which, while
> correct isn't helpful.  With DrawIndirect, you can't really know what the
> actual range is so we may as well flag it as being an invalid range.  This
> is what we do for draws with index buffer which is similar (the indices
> aren't statically known) if a bit simpler.
> 
> Cc: "10.2" 
> ---
>  src/mesa/vbo/vbo_context.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
> index 9f807a1..ae5d265 100644
> --- a/src/mesa/vbo/vbo_context.c
> +++ b/src/mesa/vbo/vbo_context.c
> @@ -170,7 +170,7 @@ vbo_draw_indirect_prims(struct gl_context *ctx,
> }
>  
> vbo->draw_prims(ctx, prim, draw_count,
> -   ib, GL_TRUE, 0, ~0,
> +   ib, false, ~0, ~0,
> NULL, 0,
> ctx->DrawIndirectBuffer);
>  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/13] i965/draw: Delay when we get the bo for vertex buffers

2016-05-19 Thread Iago Toral
On Thu, 2016-05-19 at 00:21 -0700, Jason Ekstrand wrote:
> The previous code got the BO the first time we encountered it.  However,
> this can potentially lead to problems if the BO is used for multiple arrays
> with the same buffer object because the range we declare as busy may not be
> quite right.  By delaying the call to intel_bufferobj_buffer, we can ensure
> that we have the full range for the given buffer.
> 
> Cc: "10.2" 
> ---
>  src/mesa/drivers/dri/i965/brw_draw_upload.c | 71 
> -
>  1 file changed, 49 insertions(+), 22 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c 
> b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> index 3ec37f8..0a7725d 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> @@ -453,6 +453,11 @@ brw_prepare_vertices(struct brw_context *brw)
> if (brw->vb.nr_buffers)
>return;
>  
> +   /* The range of data in a given buffer represented as [min, max) */
> +   struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX];
> +   uint32_t buffer_range_start[VERT_ATTRIB_MAX];
> +   uint32_t buffer_range_end[VERT_ATTRIB_MAX];
> +
> for (i = j = 0; i < brw->vb.nr_enabled; i++) {
>struct brw_vertex_element *input = brw->vb.enabled[i];
>const struct gl_client_array *glarray = input->glarray;
> @@ -460,12 +465,31 @@ brw_prepare_vertices(struct brw_context *brw)
>if (_mesa_is_bufferobj(glarray->BufferObj)) {
>struct intel_buffer_object *intel_buffer =
>   intel_buffer_object(glarray->BufferObj);
> -  unsigned k;
> +
> + const uint32_t offset = (uintptr_t)glarray->Ptr;

Should we use uint64_t instead or do we know that these offsets need to
be within a 32-bit address?

> + uint32_t start, range;
> + if (glarray->InstanceDivisor) {
> +start = offset;
> +range = (glarray->StrideB * ((brw->num_instances /
> + glarray->InstanceDivisor) - 1) +
> + glarray->_ElementSize);
> + } else {
> +if (!brw->vb.index_bounds_valid) {
> +   start = 0;
> +   range = intel_buffer->Base.Size;
> +} else {
> +   start = offset + min_index * glarray->StrideB;
> +   range = (glarray->StrideB * (max_index - min_index) +
> +glarray->_ElementSize);
> +}
> + }
>  
>/* If we have a VB set to be uploaded for this buffer object
> * already, reuse that VB state so that we emit fewer
> * relocations.
> */
> +  unsigned k;
>for (k = 0; k < i; k++) {
>   const struct gl_client_array *other = brw->vb.enabled[k]->glarray;
>   if (glarray->BufferObj == other->BufferObj &&
> @@ -475,6 +499,9 @@ brw_prepare_vertices(struct brw_context *brw)
>   {
>  input->buffer = brw->vb.enabled[k]->buffer;
>  input->offset = glarray->Ptr - other->Ptr;
> +
> +   buffer_range_start[k] = MIN2(buffer_range_start[k], start);
> +   buffer_range_end[k] = MAX2(buffer_range_end[k], start + 
> range);
>  break;
>   }
>}
> @@ -482,29 +509,13 @@ brw_prepare_vertices(struct brw_context *brw)
>   struct brw_vertex_buffer *buffer = >vb.buffers[j];
>  
>   /* Named buffer object: Just reference its contents directly. */
> - buffer->offset = (uintptr_t)glarray->Ptr;
> + buffer->offset = offset;
>   buffer->stride = glarray->StrideB;
>   buffer->step_rate = glarray->InstanceDivisor;
>  
> -uint32_t offset, size;
> -if (glarray->InstanceDivisor) {
> -   offset = buffer->offset;
> -   size = (buffer->stride * ((brw->num_instances /
> -  glarray->InstanceDivisor) - 1) +
> -   glarray->_ElementSize);
> -} else {
> -   if (!brw->vb.index_bounds_valid) {
> -  offset = 0;
> -  size = intel_buffer->Base.Size;
> -   } else {
> -  offset = buffer->offset + min_index * buffer->stride;
> -  size = (buffer->stride * (max_index - min_index) +
> -  glarray->_ElementSize);
> -   }
> -}
> -buffer->bo = intel_bufferobj_buffer(brw, intel_buffer,
> -offset, size);
> -drm_intel_bo_reference(buffer->bo);
> +enabled_buffer[j] = intel_buffer;
> +buffer_range_start[j] = start;
> +buffer_range_end[j] = start + range;
>  
>   input->buffer = j++;
>   input->offset = 0;
> @@ -519,7 +530,7 @@ brw_prepare_vertices(struct brw_context *brw)
> * probably a service to the poor programmer to do 

Re: [Mesa-dev] [PATCH] winsys/amdgpu: add back multithreaded command submission

2016-05-19 Thread Nicolai Hähnle

On 18.05.2016 13:23, Marek Olšák wrote:

,On Wed, May 18, 2016 at 7:48 PM, Nicolai Hähnle  wrote:

On 18.05.2016 11:58, Marek Olšák wrote:


On Sat, May 7, 2016 at 5:12 PM, Nicolai Hähnle  wrote:


Looks good to me, just two remarks below...


On 06.05.2016 13:31, Marek Olšák wrote:



From: Marek Olšák 

Ported from the initial amdgpu winsys from the private AMD branch.

The thread creates the buffer list, submits IBs, and cleans up
the submission context, which can also destroy buffers.

3-5% reduction in CPU overhead is expected for apps submitting a lot
of IBs per frame. This is most visible with DMA IBs.
---
src/gallium/winsys/amdgpu/drm/amdgpu_bo.c |  26 ++-
src/gallium/winsys/amdgpu/drm/amdgpu_bo.h |   4 +
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 311
+-
src/gallium/winsys/amdgpu/drm/amdgpu_cs.h |  52 +++--
src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c |  61 +
src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h |   9 +
6 files changed, 333 insertions(+), 130 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 37a41c0..ec5fa6a 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -43,8 +43,21 @@ static bool amdgpu_bo_wait(struct pb_buffer *_buf,
uint64_t timeout,
{
   struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
   struct amdgpu_winsys *ws = bo->ws;
+   int64_t abs_timeout;
   int i;

+   if (timeout == 0) {
+  if (p_atomic_read(>num_active_ioctls))
+ return false;
+
+   } else {
+  abs_timeout = os_time_get_absolute_timeout(timeout);
+
+  /* Wait if any ioctl is being submitted with this buffer. */
+  if (!os_wait_until_zero_abs_timeout(>num_active_ioctls,
abs_timeout))
+ return false;
+   }




I'd suggest to do the cs_sync_flush here instead of below - there is less
action at a distance, and some additional code paths end up covered by a
flush as well.



Unfortunately, amdgpu_bo_wait is exposed via the winsys interface and
doesn't accept a CS. We could extend it to accept a CS or two, but
that would mean adding most of what r600_buffer_map_sync_with_rings is
doing. It would be a bigger cleanup and I think it should be done as a
separate patch if we wanted to go down that road.



Okay, fair enough. Let's keep an eye out for whether some use cases get into
busy waits, just in case.

The amdgpu_cs_sync_flush should be added to the other branch of the
PIPE_TRANSFER_WRITE check in amdgpu_bo_map though, right?


Yes, good catch.

Did you mean this with the semaphore?
https://cgit.freedesktop.org/~mareko/mesa/commit/?h=tmp=401abd01d5f44430df71deb74b6e78a0eac2


Yes, that looks good to me. With that change, the patch is

Reviewed-by: Nicolai Hähnle 


Marek


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] nvc0: clear out surfaces bufctx before rebinding everything

2016-05-19 Thread Ilia Mirkin
Yes, oops. Shouldn't fix any invalidation issues. Could fix unbounded
memory growth due to switching back and forth between cp and 3d without
setting any new images. Please fold this into your series.
On May 19, 2016 4:13 AM, "Samuel Pitoiset" 
wrote:

Oops? Your patch is based on your gl43 branch with my images series but
this is not yet upstream. ;)

Anyway, the idea sounds good to me and might explain some invalidation
issues, perhaps? I'll have look later.


On 05/19/2016 03:28 AM, Ilia Mirkin wrote:

> Otherwise we can end up in a situation where that bin just grows and
> grows.
>
> Signed-off-by: Ilia Mirkin 
> ---
>  src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> index cc5ea5e..2523c20 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> @@ -963,6 +963,11 @@ nvc0_validate_suf(struct nvc0_context *nvc0, int s)
> struct nouveau_pushbuf *push = nvc0->base.pushbuf;
> struct nvc0_screen *screen = nvc0->screen;
>
> +   if (s == 5)
> +  nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
> +   else
> +  nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF);
> +
> for (int i = 0; i < NVC0_MAX_IMAGES; ++i) {
>struct pipe_image_view *view = >images[s][i];
>int width, height, depth;
>
>
-- 
-Samuel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] egl: Check if API is supported when using eglBindAPI.

2016-05-19 Thread Manolova, Plamena
On Wed, May 18, 2016 at 8:44 PM, Ian Romanick  wrote:

> On 05/18/2016 11:45 AM, Manolova, Plamena wrote:
> > Hi Ian,
> > Thanks for reviewing!
> >
> > On Wed, May 18, 2016 at 4:33 PM, Ian Romanick  > > wrote:
> >
> > On 05/17/2016 09:35 AM, Plamena Manolova wrote:
> > > According to the EGL specifications before binding an API
> > > we must check whether it's supported first. If not eglBindAPI
> > > should return EGL_FALSE and generate a EGL_BAD_PARAMETER error.
> >
> > Can you provide a spec quotation?
> >
> >
> > https://www.khronos.org/registry/egl/sdk/docs/man/html/eglBindAPI.xhtml
> >
> > "EGL_BAD_PARAMETER is generated if api is not one of the accepted
> > tokens, or if the
> > specified client API is not supported by the EGL implementation."
>
> That's the man page, not the spec.  We have found a few problems over
> the years in using man page quotations, so we generally prefer to use
> the spec.  The biggest issue is that it's harder to track changes in the
> man pages, but we can pretty easily tell when something changed between,
> say, EGL 1.2 and EGL 1.5.  If nothing else, it makes the quotation
> practice more consistent to always use the same kind of source.
>
> In Mesa, changes like this should be accompanied by a quotation, in a
> canonical format, from the specification.  Ideally, the quotation should
> go in the code being changed.  It may also be acceptable to include the
> quotation in the commit message.  The next person to look at this code
> is either going to just look at the code or look at the commit that
> added it (after using git-blame).
>
> For this case, the proper spec quotation would be:
>
> Section 3.7 (Rendering Contexts) of the EGL 1.5 spec says:
>
> "api must specify one of the supported client APIs, either
> EGL_OPENGL_API, EGL_OPENGL_ES_API, or EGL_OPENVG_API... If api
> is not one of the values specified above, or if the client API
> specified by api is not supported by the implementation, an
> EGL_BAD_PARAMETER error is generated."
>
>
Thanks for clearing this up for me Ian, I'm pretty new to Mesa so pointers
like this
are really appreciated. I'll keep it mind for next time. I'll also include
the quotation in
the code for my follow up patch.


> > > Signed-off-by: Plamena Manolova  > >
> > > ---
> > >  src/egl/main/eglcurrent.h | 33 ++---
> > >  1 file changed, 30 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/src/egl/main/eglcurrent.h b/src/egl/main/eglcurrent.h
> > > index 1e386ac..f2e19cc 100644
> > > --- a/src/egl/main/eglcurrent.h
> > > +++ b/src/egl/main/eglcurrent.h
> > > @@ -32,7 +32,8 @@
> > >  #include "c99_compat.h"
> > >
> > >  #include "egltypedefs.h"
> > > -
> > > +#include "eglglobals.h"
> > > +#include "egldisplay.h"
> > >
> > >  #ifdef __cplusplus
> > >  extern "C" {
> > > @@ -62,14 +63,40 @@ struct _egl_thread_info
> > > EGLint CurrentAPIIndex;
> > >  };
> > >
> > > -
> > > +static inline EGLBoolean
> > > +_eglDisplaySupportsApi(_EGLDisplay *dpy, EGLenum api)
> >
> > Since this is only used internally, please use bool/true/false.
> Based
> > on my comments at the bottom, I think this function should go
> directly
> > in eglapi.c.
> >
> > > +{
> >
> > This is a really complex way of doing something quite simple.  How
> > about.
> >
> >unsigned api_bit;
> >
> >if (!dpy->Initialized)
> >   return false:
> >
> >switch (api) {
> >case EGL_OPENGL_API:
> >   api_bit = EGL_OPENGL_BIT;
> >   break;
> >case EGL_OPENGL_ES_API:
> >   api_bit = EGL_OPENGL_ES_BIT |
> > EGL_OPENGL_ES2_BIT |
> > EGL_OPENGL_ES3_BIT_KHR;
> >   break;
> >case EGL_OPENVG_API:
> >   api_bit = EGL_OPENVG_BIT;
> >   break;
> >default:
> >   api_bit = 0;
> >   break;
> >}
> >
> >return (dpy->ClientAPIs & api_bit) != 0;
> >
> >
> > I'll make those changes.
> >
> >
> >
> > > +   if (!dpy->Initialized) {
> > > +  return EGL_FALSE;
> > > +   } else if (api == EGL_OPENGL_API && dpy->ClientAPIs &
> EGL_OPENGL_BIT) {
> > > +  return EGL_TRUE;
> > > +   } else if (api == EGL_OPENGL_ES_API &&
> > > +  (dpy->ClientAPIs & EGL_OPENGL_ES_BIT ||
> > > +   dpy->ClientAPIs & EGL_OPENGL_ES2_BIT ||
> > > +   dpy->ClientAPIs & EGL_OPENGL_ES3_BIT_KHR)) {
> > > +  return EGL_TRUE;
> > > +   } else if (api == EGL_OPENVG_API && dpy->ClientAPIs &
> EGL_OPENVG_BIT) {
> > > +  return EGL_TRUE;
> > > +   } else {
> > > +  return EGL_FALSE;
> > > +   }
> > > +}
> > >  /**
> > >   * Return true 

Re: [Mesa-dev] [PATCH 01/13] vbo: Declare the index range invalid for DrawIndirect

2016-05-19 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Thu, May 19, 2016 at 9:20 AM, Jason Ekstrand  wrote:
> Right now, we're just setting the range to [0, MAX_UINT32] which, while
> correct isn't helpful.  With DrawIndirect, you can't really know what the
> actual range is so we may as well flag it as being an invalid range.  This
> is what we do for draws with index buffer which is similar (the indices
> aren't statically known) if a bit simpler.
>
> Cc: "10.2" 
> ---
>  src/mesa/vbo/vbo_context.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
> index 9f807a1..ae5d265 100644
> --- a/src/mesa/vbo/vbo_context.c
> +++ b/src/mesa/vbo/vbo_context.c
> @@ -170,7 +170,7 @@ vbo_draw_indirect_prims(struct gl_context *ctx,
> }
>
> vbo->draw_prims(ctx, prim, draw_count,
> -   ib, GL_TRUE, 0, ~0,
> +   ib, false, ~0, ~0,
> NULL, 0,
> ctx->DrawIndirectBuffer);
>
> --
> 2.5.0.400.gff86faf
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/13] vbo: Declare the index range invalid for DrawTransformFeedback

2016-05-19 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Thu, May 19, 2016 at 9:20 AM, Jason Ekstrand  wrote:
> Right now, we're setting the range to [0, 0] which is obviously bogus.
> Instead, we should set it to be invalid like we do for DrawIndirect.
>
> Cc: "10.2" 
> ---
>  src/mesa/vbo/vbo_exec_array.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
> index 3b45eb5..87ed7f7 100644
> --- a/src/mesa/vbo/vbo_exec_array.c
> +++ b/src/mesa/vbo/vbo_exec_array.c
> @@ -1323,7 +1323,7 @@ vbo_draw_transform_feedback(struct gl_context *ctx, 
> GLenum mode,
>  * will be rendered. */
>
> vbo->draw_prims(ctx, prim, 1, NULL,
> -   GL_TRUE, 0, 0, obj, stream, NULL);
> +   GL_FALSE, ~0, ~0, obj, stream, NULL);
>
> if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH) {
>_mesa_flush(ctx);
> --
> 2.5.0.400.gff86faf
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/10] radeonsi: move code for setting one shader image into separate function

2016-05-19 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_descriptors.c | 144 ++
 1 file changed, 75 insertions(+), 69 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 48b1e14..d264ae7 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -501,91 +501,97 @@ si_disable_shader_image(struct si_images_info *images, 
unsigned slot)
}
 }
 
-static void
-si_set_shader_images(struct pipe_context *pipe, unsigned shader,
-unsigned start_slot, unsigned count,
-struct pipe_image_view *views)
+static void si_set_shader_image(struct si_context *ctx,
+   struct si_images_info *images,
+   unsigned slot, struct pipe_image_view *view)
 {
-   struct si_context *ctx = (struct si_context *)pipe;
struct si_screen *screen = ctx->screen;
-   struct si_images_info *images = >images[shader];
-   unsigned i, slot;
-
-   assert(shader < SI_NUM_SHADERS);
+   struct r600_resource *res;
 
-   if (!count)
+   if (!view || !view->resource) {
+   si_disable_shader_image(images, slot);
return;
+   }
 
-   assert(start_slot + count <= SI_NUM_IMAGES);
+   res = (struct r600_resource *)view->resource;
+   util_copy_image_view(>views[slot], view);
 
-   for (i = 0, slot = start_slot; i < count; ++i, ++slot) {
-   struct r600_resource *res;
+   si_sampler_view_add_buffer(ctx, >b.b,
+  RADEON_USAGE_READWRITE);
 
-   if (!views || !views[i].resource) {
-   si_disable_shader_image(images, slot);
-   continue;
-   }
+   if (res->b.b.target == PIPE_BUFFER) {
+   si_make_buffer_descriptor(screen, res,
+ view->format,
+ view->u.buf.first_element,
+ view->u.buf.last_element,
+ images->desc.list + slot * 8);
+   images->compressed_colortex_mask &= ~(1 << slot);
+   } else {
+   static const unsigned char swizzle[4] = { 0, 1, 2, 3 };
+   struct r600_texture *tex = (struct r600_texture *)res;
+   unsigned level;
+   unsigned width, height, depth;
+   uint32_t *desc = images->desc.list + slot * 8;
 
-   res = (struct r600_resource *)views[i].resource;
-   util_copy_image_view(>views[slot], [i]);
+   assert(!tex->is_depth);
+   assert(tex->fmask.size == 0);
 
-   si_sampler_view_add_buffer(ctx, >b.b,
-  RADEON_USAGE_READWRITE);
+   if (tex->dcc_offset &&
+   view->access & PIPE_IMAGE_ACCESS_WRITE)
+   r600_texture_disable_dcc(>b, tex);
 
-   if (res->b.b.target == PIPE_BUFFER) {
-   si_make_buffer_descriptor(screen, res,
- views[i].format,
- views[i].u.buf.first_element,
- views[i].u.buf.last_element,
- images->desc.list + slot * 8);
-   images->compressed_colortex_mask &= ~(1 << slot);
+   if (is_compressed_colortex(tex)) {
+   images->compressed_colortex_mask |= 1 << slot;
} else {
-   static const unsigned char swizzle[4] = { 0, 1, 2, 3 };
-   struct r600_texture *tex = (struct r600_texture *)res;
-   unsigned level;
-   unsigned width, height, depth;
-   uint32_t *desc = images->desc.list + slot * 8;
+   images->compressed_colortex_mask &= ~(1 << slot);
+   }
+
+   /* Always force the base level to the selected level.
+*
+* This is required for 3D textures, where otherwise
+* selecting a single slice for non-layered bindings
+* fails. It doesn't hurt the other targets.
+*/
+   level = view->u.tex.level;
+   width = u_minify(res->b.b.width0, level);
+   height = u_minify(res->b.b.height0, level);
+   depth = u_minify(res->b.b.depth0, level);
+
+   si_make_texture_descriptor(screen, tex,
+  false, res->b.b.target,
+  view->format, swizzle,
+  0, 0,
+  

[Mesa-dev] [PATCH 08/10] gallium/radeon: degrade tiled textures mapped often to linear

2016-05-19 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_pipe_common.h |   1 +
 src/gallium/drivers/radeon/r600_texture.c | 102 ++
 2 files changed, 103 insertions(+)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 7713233..ccdf22a 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -236,6 +236,7 @@ struct r600_texture {
struct r600_resourceresource;
 
uint64_tsize;
+   unsignednum_level0_transfers;
boolis_depth;
unsigneddirty_level_mask; /* each bit says if 
that mipmap is compressed */
unsignedstencil_dirty_level_mask; /* each bit 
says if that mipmap is compressed */
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index ef5c113..5a0bd23 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -36,6 +36,8 @@ static void r600_texture_discard_dcc(struct 
r600_common_screen *rscreen,
 struct r600_texture *rtex);
 static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
   struct r600_texture *rtex);
+static unsigned r600_choose_tiling(struct r600_common_screen *rscreen,
+  const struct pipe_resource *templ);
 
 
 bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
@@ -403,6 +405,74 @@ void r600_texture_disable_dcc(struct r600_common_screen 
*rscreen,
r600_texture_discard_dcc(rscreen, rtex);
 }
 
+static void r600_degrade_tile_mode_to_linear(struct r600_common_context *rctx,
+struct r600_texture *rtex,
+bool invalidate_storage)
+{
+   struct pipe_screen *screen = rctx->b.screen;
+   struct r600_texture *new_tex;
+   struct pipe_resource templ = rtex->resource.b.b;
+   unsigned i;
+
+   templ.bind |= PIPE_BIND_LINEAR;
+
+   /* r600g doesn't react to dirty_tex_descriptor_counter */
+   if (rctx->chip_class < SI)
+   return;
+
+   if (rtex->resource.is_shared ||
+   rtex->surface.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED)
+   return;
+
+   /* This fails with MSAA, depth, and compressed textures. */
+   if (r600_choose_tiling(rctx->screen, ) !=
+   RADEON_SURF_MODE_LINEAR_ALIGNED)
+   return;
+
+   new_tex = (struct r600_texture*)screen->resource_create(screen, );
+   if (!new_tex)
+   return;
+
+   /* Copy the pixels to the new texture. */
+   if (!invalidate_storage) {
+   for (i = 0; i <= templ.last_level; i++) {
+   struct pipe_box box;
+
+   u_box_3d(0, 0, 0,
+u_minify(templ.width0, i), 
u_minify(templ.height0, i),
+util_max_layer(, i) + 1, );
+
+   rctx->dma_copy(>b, _tex->resource.b.b, i, 0, 
0, 0,
+  >resource.b.b, i, );
+   }
+   }
+
+   r600_texture_discard_cmask(rctx->screen, rtex);
+   r600_texture_discard_dcc(rctx->screen, rtex);
+
+   /* Replace the structure fields of rtex. */
+   rtex->resource.b.b.bind = templ.bind;
+   pb_reference(>resource.buf, new_tex->resource.buf);
+   rtex->resource.gpu_address = new_tex->resource.gpu_address;
+   rtex->resource.domains = new_tex->resource.domains;
+   rtex->size = new_tex->size;
+   rtex->surface = new_tex->surface;
+   rtex->non_disp_tiling = new_tex->non_disp_tiling;
+   rtex->cb_color_info = new_tex->cb_color_info;
+   rtex->cmask = new_tex->cmask; /* needed even without CMASK */
+
+   assert(!rtex->htile_buffer);
+   assert(!rtex->cmask.size);
+   assert(!rtex->fmask.size);
+   assert(!rtex->dcc_offset);
+   assert(!rtex->is_depth);
+
+   pipe_resource_reference((struct pipe_resource**)_tex, NULL);
+
+   r600_dirty_all_framebuffer_states(rctx->screen);
+   p_atomic_inc(>screen->dirty_tex_descriptor_counter);
+}
+
 static boolean r600_texture_get_handle(struct pipe_screen* screen,
   struct pipe_resource *resource,
   struct winsys_handle *whandle,
@@ -1216,6 +1286,22 @@ static void r600_init_temp_resource_from_box(struct 
pipe_resource *res,
}
 }
 
+static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen,
+   struct r600_texture *rtex,
+   unsigned transfer_usage,
+   const struct 

[Mesa-dev] [PATCH 10/10] gallium/radeon: lower memory usage during texture transfers

2016-05-19 Thread Marek Olšák
From: Marek Olšák 

This improves throughput by keeping TTM overhead down.

Some piglit tests such as texelFetch and streaming-texture-leak will
use less memory now.
---
 src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
 src/gallium/drivers/radeon/r600_texture.c | 32 +++
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index ccdf22a..4cdc69a 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -496,6 +496,7 @@ struct r600_common_context {
/* Misc stats. */
unsignednum_draw_calls;
unsignednum_dma_calls;
+   uint64_tnum_alloc_tex_transfer_bytes;
 
/* Render condition. */
struct r600_atomrender_cond_atom;
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 1333a25..1f973cc 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1302,9 +1302,11 @@ static bool r600_can_invalidate_texture(struct 
r600_common_screen *rscreen,
 box->depth);
 }
 
-static void r600_texture_invalidate_storage(struct r600_common_screen *rscreen,
+static void r600_texture_invalidate_storage(struct r600_common_context *rctx,
struct r600_texture *rtex)
 {
+   struct r600_common_screen *rscreen = rctx->screen;
+
/* There is no point in discarding depth and tiled buffers. */
assert(!rtex->is_depth);
assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED);
@@ -1319,6 +1321,8 @@ static void r600_texture_invalidate_storage(struct 
r600_common_screen *rscreen,
 
r600_dirty_all_framebuffer_states(rscreen);
p_atomic_inc(>dirty_tex_descriptor_counter);
+
+   rctx->num_alloc_tex_transfer_bytes += rtex->size;
 }
 
 static void *r600_texture_transfer_map(struct pipe_context *ctx,
@@ -1378,8 +1382,7 @@ static void *r600_texture_transfer_map(struct 
pipe_context *ctx,
/* It's busy. */
if (r600_can_invalidate_texture(rctx->screen, rtex,
usage, box))
-   r600_texture_invalidate_storage(rctx->screen,
-   rtex);
+   r600_texture_invalidate_storage(rctx, rtex);
else
use_staging_texture = true;
}
@@ -1499,6 +1502,7 @@ static void *r600_texture_transfer_map(struct 
pipe_context *ctx,
 static void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer* transfer)
 {
+   struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
struct pipe_resource *texture = transfer->resource;
struct r600_texture *rtex = (struct r600_texture*)texture;
@@ -1514,8 +1518,28 @@ static void r600_texture_transfer_unmap(struct 
pipe_context *ctx,
}
}
 
-   if (rtransfer->staging)
+   if (rtransfer->staging) {
+   rctx->num_alloc_tex_transfer_bytes += 
rtransfer->staging->buf->size;
pipe_resource_reference((struct 
pipe_resource**)>staging, NULL);
+   }
+
+   /* Heuristic for {upload, draw, upload, draw, ..}:
+*
+* Flush the gfx IB if we've allocated too much texture storage.
+*
+* The idea is that we don't want to build IBs that use too much
+* memory and put pressure on the kernel memory manager and we also
+* want to make temporary and invalidated buffers go idle ASAP to
+* decrease the total memory usage or make them reusable. The memory
+* usage will be slightly higher than given here because of the buffer
+* cache in the winsys.
+*
+* The result is that the kernel memory manager is never a bottleneck.
+*/
+   if (rctx->num_alloc_tex_transfer_bytes > rctx->screen->info.vram_size / 
4) {
+   rctx->gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
+   rctx->num_alloc_tex_transfer_bytes = 0;
+   }
 
FREE(transfer);
 }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/10] gallium/radeon: clean up and better comment use_staging_texture

2016-05-19 Thread Marek Olšák
From: Marek Olšák 

Next commits will add other things around this.
---
 src/gallium/drivers/radeon/r600_texture.c | 42 +--
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 43d130d..ef5c113 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1226,31 +1226,35 @@ static void *r600_texture_transfer_map(struct 
pipe_context *ctx,
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_texture *rtex = (struct r600_texture*)texture;
struct r600_transfer *trans;
-   boolean use_staging_texture = FALSE;
struct r600_resource *buf;
unsigned offset = 0;
char *map;
+   bool use_staging_texture = false;
 
assert(!(texture->flags & R600_RESOURCE_FLAG_TRANSFER));
 
-   /* We cannot map a tiled texture directly because the data is
-* in a different order, therefore we do detiling using a blit.
-*
-* Also, use a temporary in GTT memory for read transfers, as
-* the CPU is much happier reading out of cached system memory
-* than uncached VRAM.
-*/
-   if (rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
-   use_staging_texture = TRUE;
-   } else if ((usage & PIPE_TRANSFER_READ) &&
-  rtex->resource.domains & RADEON_DOMAIN_VRAM) {
-   /* Untiled buffers in VRAM, which is slow for CPU reads */
-   use_staging_texture = TRUE;
-   } else if (!(usage & PIPE_TRANSFER_READ) &&
-   (r600_rings_is_buffer_referenced(rctx, rtex->resource.buf, 
RADEON_USAGE_READWRITE) ||
-!rctx->ws->buffer_wait(rtex->resource.buf, 0, 
RADEON_USAGE_READWRITE))) {
-   /* Use a staging texture for uploads if the underlying BO is 
busy. */
-   use_staging_texture = TRUE;
+   /* Depth textures use staging unconditionally. */
+   if (!rtex->is_depth) {
+   /* Tiled textures need to be converted into a linear texture 
for CPU
+* access. The staging texture is always linear and is placed 
in GART.
+*
+* Reading from VRAM is slow, always use the staging texture in
+* this case.
+*
+* Use the staging texture for uploads if the underlying BO
+* is busy.
+*/
+   if (rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D)
+   use_staging_texture = true;
+   else if (usage & PIPE_TRANSFER_READ)
+   use_staging_texture = (rtex->resource.domains &
+  RADEON_DOMAIN_VRAM) != 0;
+   /* Write & linear only: */
+   else if (r600_rings_is_buffer_referenced(rctx, 
rtex->resource.buf,
+
RADEON_USAGE_READWRITE) ||
+!rctx->ws->buffer_wait(rtex->resource.buf, 0,
+   RADEON_USAGE_READWRITE))
+   use_staging_texture = true;
}
 
trans = CALLOC_STRUCT(r600_transfer);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/10] radeonsi: set some colorbuffer register fields at emit time

2016-05-19 Thread Marek Olšák
From: Marek Olšák 

to allow reallocating the texture storage with different parameters
---
 src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
 src/gallium/drivers/radeon/r600_texture.c |  2 +
 src/gallium/drivers/radeonsi/si_state.c   | 94 +--
 3 files changed, 47 insertions(+), 50 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index e1a2efb..7713233 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -264,6 +264,7 @@ struct r600_texture {
 
 struct r600_surface {
struct pipe_surface base;
+   const struct radeon_surf_level  *level_info;
 
bool color_initialized;
bool depth_initialized;
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 23c483b..43d130d 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1403,6 +1403,7 @@ struct pipe_surface *r600_create_surface_custom(struct 
pipe_context *pipe,
const struct pipe_surface 
*templ,
unsigned width, unsigned height)
 {
+   struct r600_texture *rtex = (struct r600_texture*)texture;
struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
 
if (!surface)
@@ -1418,6 +1419,7 @@ struct pipe_surface *r600_create_surface_custom(struct 
pipe_context *pipe,
surface->base.width = width;
surface->base.height = height;
surface->base.u = templ->u;
+   surface->level_info = >surface.level[templ->u.tex.level];
return >base;
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 45b4021..b685295 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1960,10 +1960,7 @@ static void si_initialize_color_surface(struct 
si_context *sctx,
 {
struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
unsigned level = surf->base.u.tex.level;
-   uint64_t offset = rtex->surface.level[level].offset;
-   unsigned pitch, slice;
-   unsigned color_info, color_attrib, color_pitch, color_view;
-   unsigned tile_mode_index;
+   unsigned color_info, color_attrib, color_view;
unsigned format, swap, ntype, endian;
const struct util_format_description *desc;
int i;
@@ -1972,14 +1969,6 @@ static void si_initialize_color_surface(struct 
si_context *sctx,
color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
 S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer);
 
-   pitch = (rtex->surface.level[level].nblk_x) / 8 - 1;
-   slice = (rtex->surface.level[level].nblk_x * 
rtex->surface.level[level].nblk_y) / 64;
-   if (slice) {
-   slice = slice - 1;
-   }
-
-   tile_mode_index = si_tile_mode_index(rtex, level, false);
-
desc = util_format_description(surf->base.format);
for (i = 0; i < 4; i++) {
if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
@@ -2045,12 +2034,9 @@ static void si_initialize_color_surface(struct 
si_context *sctx,
S_028C70_NUMBER_TYPE(ntype) |
S_028C70_ENDIAN(endian);
 
-   color_pitch = S_028C64_TILE_MAX(pitch);
-
/* Intensity is implemented as Red, so treat it that way. */
-   color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) |
-   S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 ||
-  
util_format_is_intensity(surf->base.format));
+   color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == 
PIPE_SWIZZLE_1 ||
+ 
util_format_is_intensity(surf->base.format));
 
if (rtex->resource.b.b.nr_samples > 1) {
unsigned log_samples = 
util_logbase2(rtex->resource.b.b.nr_samples);
@@ -2062,23 +2048,13 @@ static void si_initialize_color_surface(struct 
si_context *sctx,
color_info |= S_028C70_COMPRESSION(1);
unsigned fmask_bankh = 
util_logbase2(rtex->fmask.bank_height);
 
-   color_attrib |= 
S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index);
-
if (sctx->b.chip_class == SI) {
/* due to a hw bug, FMASK_BANK_HEIGHT must be 
set on SI too */
color_attrib |= 
S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
}
-   if (sctx->b.chip_class >= CIK) {
-   color_pitch |= 
S_028C64_FMASK_TILE_MAX(rtex->fmask.pitch_in_pixels / 8 - 1);
-   }
}
}
 
-   offset += 

[Mesa-dev] [PATCH 01/10] gallium/util: add util_texrange_covers_whole_level from radeon

2016-05-19 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/auxiliary/util/u_inlines.h| 12 
 src/gallium/drivers/radeon/r600_texture.c | 23 ++-
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_inlines.h 
b/src/gallium/auxiliary/util/u_inlines.h
index 07c058a..b733c9f 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -650,6 +650,18 @@ util_max_layer(const struct pipe_resource *r, unsigned 
level)
}
 }
 
+static inline bool
+util_texrange_covers_whole_level(const struct pipe_resource *tex,
+ unsigned level, unsigned x, unsigned y,
+ unsigned z, unsigned width,
+ unsigned height, unsigned depth)
+{
+   return x == 0 && y == 0 && z == 0 &&
+  width == u_minify(tex->width0, level) &&
+  height == u_minify(tex->height0, level) &&
+  depth == util_max_layer(tex, level) + 1;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index e2955aa..db63beb 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -38,17 +38,6 @@ static void r600_texture_discard_cmask(struct 
r600_common_screen *rscreen,
   struct r600_texture *rtex);
 
 
-static bool range_covers_whole_texture(struct pipe_resource *tex,
-  unsigned level, unsigned x, unsigned y,
-  unsigned z, unsigned width,
-  unsigned height, unsigned depth)
-{
-   return x == 0 && y == 0 && z == 0 &&
-  width == u_minify(tex->width0, level) &&
-  height == u_minify(tex->height0, level) &&
-  depth == util_max_layer(tex, level) + 1;
-}
-
 bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
   struct r600_texture *rdst,
   unsigned dst_level, unsigned dstx,
@@ -87,9 +76,9 @@ bool r600_prepare_for_dma_blit(struct r600_common_context 
*rctx,
if (rdst->dcc_offset) {
/* We can't discard DCC if the texture has been exported. */
if (rdst->resource.is_shared ||
-   !range_covers_whole_texture(>resource.b.b, dst_level,
-   dstx, dsty, dstz, 
src_box->width,
-   src_box->height, 
src_box->depth))
+   !util_texrange_covers_whole_level(>resource.b.b, 
dst_level,
+ dstx, dsty, dstz, 
src_box->width,
+ src_box->height, 
src_box->depth))
return false;
 
r600_texture_discard_dcc(rctx->screen, rdst);
@@ -101,9 +90,9 @@ bool r600_prepare_for_dma_blit(struct r600_common_context 
*rctx,
 *SDMA. Otherwise, use the 3D path.
 */
if (rdst->cmask.size && rdst->dirty_level_mask & (1 << dst_level)) {
-   if (!range_covers_whole_texture(>resource.b.b, dst_level,
-   dstx, dsty, dstz, 
src_box->width,
-   src_box->height, 
src_box->depth))
+   if (!util_texrange_covers_whole_level(>resource.b.b, 
dst_level,
+ dstx, dsty, dstz, 
src_box->width,
+ src_box->height, 
src_box->depth))
return false;
 
r600_texture_discard_cmask(rctx->screen, rdst);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/10] gallium/radeon: strenghten some checking for DMA preparation

2016-05-19 Thread Marek Olšák
From: Marek Olšák 

Just for consistency. This doesn't fix anything, because DCC is not
supported with non-mipmapped textures.
---
 src/gallium/drivers/radeon/r600_texture.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index db63beb..23c483b 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -74,8 +74,11 @@ bool r600_prepare_for_dma_blit(struct r600_common_context 
*rctx,
return false;
 
if (rdst->dcc_offset) {
-   /* We can't discard DCC if the texture has been exported. */
+   /* We can't discard DCC if the texture has been exported.
+* Also, we can't discard DCC for one level only.
+*/
if (rdst->resource.is_shared ||
+   rdst->resource.b.b.last_level > 0 ||
!util_texrange_covers_whole_level(>resource.b.b, 
dst_level,
  dstx, dsty, dstz, 
src_box->width,
  src_box->height, 
src_box->depth))
@@ -90,6 +93,8 @@ bool r600_prepare_for_dma_blit(struct r600_common_context 
*rctx,
 *SDMA. Otherwise, use the 3D path.
 */
if (rdst->cmask.size && rdst->dirty_level_mask & (1 << dst_level)) {
+   /* The CMASK clear is only enabled for the first level. */
+   assert(dst_level == 0);
if (!util_texrange_covers_whole_level(>resource.b.b, 
dst_level,
  dstx, dsty, dstz, 
src_box->width,
  src_box->height, 
src_box->depth))
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/10] radeonsi: set some image descriptor fields at bind time

2016-05-19 Thread Marek Olšák
From: Marek Olšák 

mainly the fields that can change by reallocating a texture and changing
the tile mode
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 64 +
 src/gallium/drivers/radeonsi/si_pipe.h|  3 +
 src/gallium/drivers/radeonsi/si_state.c   | 99 ---
 src/gallium/drivers/radeonsi/si_state.h   | 16 -
 4 files changed, 111 insertions(+), 71 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 855b79e..48b1e14 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -60,6 +60,7 @@
 #include "si_shader.h"
 #include "sid.h"
 
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_suballoc.h"
@@ -294,40 +295,70 @@ static void si_sampler_views_begin_new_cs(struct 
si_context *sctx,
  RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
 }
 
+void si_set_mutable_tex_desc_fields(struct r600_texture *tex,
+   const struct radeon_surf_level 
*base_level_info,
+   unsigned base_level, unsigned block_width,
+   bool is_stencil, uint32_t *state)
+{
+   uint64_t va = tex->resource.gpu_address + base_level_info->offset;
+   unsigned pitch = base_level_info->nblk_x * block_width;
+
+   state[1] &= C_008F14_BASE_ADDRESS_HI;
+   state[3] &= C_008F1C_TILING_INDEX;
+   state[4] &= C_008F20_PITCH;
+   state[6] &= C_008F28_COMPRESSION_EN;
+
+   state[0] = va >> 8;
+   state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
+   state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(tex, base_level,
+is_stencil));
+   state[4] |= S_008F20_PITCH(pitch - 1);
+
+   if (tex->dcc_offset) {
+   state[6] |= S_008F28_COMPRESSION_EN(1);
+   state[7] = (tex->resource.gpu_address +
+   tex->dcc_offset +
+   base_level_info->dcc_offset) >> 8;
+   }
+}
+
 static void si_set_sampler_view(struct si_context *sctx,
struct si_sampler_views *views,
unsigned slot, struct pipe_sampler_view *view)
 {
struct si_sampler_view *rview = (struct si_sampler_view*)view;
 
-   if (view && view->texture && view->texture->target != PIPE_BUFFER &&
-   G_008F28_COMPRESSION_EN(rview->state[6]) &&
-   ((struct r600_texture*)view->texture)->dcc_offset == 0) {
-   rview->state[6] &= C_008F28_COMPRESSION_EN &
-  C_008F28_ALPHA_IS_ON_MSB;
-   } else if (views->views[slot] == view)
+   if (views->views[slot] == view)
return;
 
if (view) {
struct r600_texture *rtex = (struct r600_texture 
*)view->texture;
+   uint32_t *desc = views->desc.list + slot * 16;
 
si_sampler_view_add_buffer(sctx, view->texture,
   RADEON_USAGE_READ);
 
pipe_sampler_view_reference(>views[slot], view);
-   memcpy(views->desc.list + slot * 16, rview->state, 8*4);
+   memcpy(desc, rview->state, 8*4);
+
+   if (view->texture && view->texture->target != PIPE_BUFFER)
+   si_set_mutable_tex_desc_fields(rtex,
+  rview->base_level_info,
+  rview->base_level,
+  rview->block_width,
+  false, desc);
 
if (view->texture && view->texture->target != PIPE_BUFFER &&
rtex->fmask.size) {
-   memcpy(views->desc.list + slot*16 + 8,
+   memcpy(desc + 8,
   rview->fmask_state, 8*4);
} else {
/* Disable FMASK and bind sampler state in [12:15]. */
-   memcpy(views->desc.list + slot*16 + 8,
+   memcpy(desc + 8,
   null_texture_descriptor, 4*4);
 
if (views->sampler_states[slot])
-   memcpy(views->desc.list + slot*16 + 12,
+   memcpy(desc + 12,
   views->sampler_states[slot], 4*4);
}
 
@@ -513,6 +544,7 @@ si_set_shader_images(struct pipe_context *pipe, unsigned 
shader,
struct r600_texture *tex = (struct r600_texture *)res;
unsigned level;
unsigned width, height, depth;
+   uint32_t *desc = images->desc.list + slot * 8;
 
 

[Mesa-dev] [PATCH 09/10] gallium/radeon: invalidate busy linear textures for whole-texture uploads

2016-05-19 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_texture.c | 30 --
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 5a0bd23..1333a25 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1302,6 +1302,25 @@ static bool r600_can_invalidate_texture(struct 
r600_common_screen *rscreen,
 box->depth);
 }
 
+static void r600_texture_invalidate_storage(struct r600_common_screen *rscreen,
+   struct r600_texture *rtex)
+{
+   /* There is no point in discarding depth and tiled buffers. */
+   assert(!rtex->is_depth);
+   assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED);
+
+   /* Reallocate the buffer in the same pipe_resource. */
+   r600_init_resource(rscreen, >resource, rtex->size,
+  rtex->surface.bo_alignment);
+
+   /* Initialize the CMASK base address (needed even without CMASK). */
+   rtex->cmask.base_address_reg =
+   (rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
+
+   r600_dirty_all_framebuffer_states(rscreen);
+   p_atomic_inc(>dirty_tex_descriptor_counter);
+}
+
 static void *r600_texture_transfer_map(struct pipe_context *ctx,
   struct pipe_resource *texture,
   unsigned level,
@@ -1355,8 +1374,15 @@ static void *r600_texture_transfer_map(struct 
pipe_context *ctx,
else if (r600_rings_is_buffer_referenced(rctx, 
rtex->resource.buf,
 
RADEON_USAGE_READWRITE) ||
 !rctx->ws->buffer_wait(rtex->resource.buf, 0,
-   RADEON_USAGE_READWRITE))
-   use_staging_texture = true;
+   RADEON_USAGE_READWRITE)) {
+   /* It's busy. */
+   if (r600_can_invalidate_texture(rctx->screen, rtex,
+   usage, box))
+   r600_texture_invalidate_storage(rctx->screen,
+   rtex);
+   else
+   use_staging_texture = true;
+   }
}
 
trans = CALLOC_STRUCT(r600_transfer);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/10] radeonsi: implement global resetting of texture descriptors

2016-05-19 Thread Marek Olšák
From: Marek Olšák 

it will be used by texture reallocation
---
 src/gallium/drivers/radeon/r600_pipe_common.h |  7 
 src/gallium/drivers/radeonsi/si_descriptors.c | 53 ---
 src/gallium/drivers/radeonsi/si_state.h   |  1 +
 src/gallium/drivers/radeonsi/si_state_draw.c  |  9 -
 4 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 3e54534..e1a2efb 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -363,6 +363,12 @@ struct r600_common_screen {
 */
unsignedcompressed_colortex_counter;
 
+   /* Atomically increment this counter when an existing texture's
+* backing buffer or tile mode parameters have changed that requires
+* recomputation of shader descriptors.
+*/
+   unsigneddirty_tex_descriptor_counter;
+
void (*query_opaque_metadata)(struct r600_common_screen *rscreen,
  struct r600_texture *rtex,
  struct radeon_bo_metadata *md);
@@ -455,6 +461,7 @@ struct r600_common_context {
unsignedgpu_reset_counter;
unsignedlast_dirty_fb_counter;
unsignedlast_compressed_colortex_counter;
+   unsignedlast_dirty_tex_descriptor_counter;
 
struct u_upload_mgr *uploader;
struct u_suballocator   *allocator_so_filled_size;
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index d264ae7..e07252c 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -324,11 +324,12 @@ void si_set_mutable_tex_desc_fields(struct r600_texture 
*tex,
 
 static void si_set_sampler_view(struct si_context *sctx,
struct si_sampler_views *views,
-   unsigned slot, struct pipe_sampler_view *view)
+   unsigned slot, struct pipe_sampler_view *view,
+   bool disallow_early_out)
 {
struct si_sampler_view *rview = (struct si_sampler_view*)view;
 
-   if (views->views[slot] == view)
+   if (views->views[slot] == view && !disallow_early_out)
return;
 
if (view) {
@@ -398,11 +399,11 @@ static void si_set_sampler_views(struct pipe_context *ctx,
if (!views || !views[i]) {
samplers->depth_texture_mask &= ~(1u << slot);
samplers->compressed_colortex_mask &= ~(1u << slot);
-   si_set_sampler_view(sctx, >views, slot, NULL);
+   si_set_sampler_view(sctx, >views, slot, NULL, 
false);
continue;
}
 
-   si_set_sampler_view(sctx, >views, slot, views[i]);
+   si_set_sampler_view(sctx, >views, slot, views[i], 
false);
 
if (views[i]->texture && views[i]->texture->target != 
PIPE_BUFFER) {
struct r600_texture *rtex =
@@ -514,7 +515,9 @@ static void si_set_shader_image(struct si_context *ctx,
}
 
res = (struct r600_resource *)view->resource;
-   util_copy_image_view(>views[slot], view);
+
+   if (>views[slot] != view)
+   util_copy_image_view(>views[slot], view);
 
si_sampler_view_add_buffer(ctx, >b.b,
   RADEON_USAGE_READWRITE);
@@ -1353,6 +1356,46 @@ static void si_invalidate_buffer(struct pipe_context 
*ctx, struct pipe_resource
}
 }
 
+/* Update mutable image descriptor fields of all bound textures. */
+void si_update_all_texture_descriptors(struct si_context *sctx)
+{
+   unsigned shader;
+
+   for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
+   struct si_sampler_views *samplers = 
>samplers[shader].views;
+   struct si_images_info *images = >images[shader];
+   unsigned mask;
+
+   /* Images. */
+   mask = images->desc.enabled_mask;
+   while (mask) {
+   unsigned i = u_bit_scan();
+   struct pipe_image_view *view = >views[i];
+
+   if (!view->resource ||
+   view->resource->target == PIPE_BUFFER)
+   continue;
+
+   si_set_shader_image(sctx, images, i, view);
+   }
+
+   /* Sampler views. */
+   mask = samplers->desc.enabled_mask;
+   while (mask) {
+   unsigned i = u_bit_scan();
+   struct pipe_sampler_view *view = samplers->views[i];
+
+  

[Mesa-dev] [PATCH 00/10] RadeonSI: Improve texture streaming upload performance

2016-05-19 Thread Marek Olšák
Hi,

This series improves texture streaming upload performance.

Please review.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/fs: do not depend on std140 alignment rules for UBO loads

2016-05-19 Thread Iago Toral Quiroga
The previous implementation relied on the std140 alignment rules to
avoid handling misalignment in the case where we are loading more than
2 double components from a vector, which requires to emit a second load
message.

This alternative implementation deals with misalignment and is more
flexible going forward. All credit for this goes to Curro, since he
not only suggested this but also wrote the implementation in the
mailing list.
---

Curro, maybe I should make you the author and me the reviewer then? :)

 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 59 +++-
 1 file changed, 13 insertions(+), 46 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index ebcc92a..35a6aed 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -3583,9 +3583,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
nir->info.num_ubos - 1);
   }
 
-  /* Number of 32-bit slots in the type */
-  unsigned type_slots = MAX2(1, type_sz(dest.type) / 4);
-
   nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
   if (const_offset == NULL) {
  fs_reg base_offset = retype(get_nir_src(instr->src[1]),
@@ -3603,55 +3600,25 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   * we let CSE deal with duplicate loads. Here we see a vector access
   * and we have to split it if necessary.
   */
- fs_reg packed_consts = vgrf(glsl_type::float_type);
- packed_consts.type = dest.type;
+ const unsigned type_size = type_sz(dest.type);
+ const fs_reg packed_consts = bld.vgrf(BRW_REGISTER_TYPE_F);
+ for (unsigned c = 0; c < instr->num_components;) {
+const unsigned base = const_offset->u32[0] + c * type_size;
 
- unsigned const_offset_aligned = const_offset->u32[0] & ~15;
+/* Number of usable components in the next 16B-aligned load */
+const unsigned count = MIN2(instr->num_components - c,
+(16 - base % 16) / type_size);
 
- /* A vec4 only contains half of a dvec4, if we need more than 2
-  * components of a dvec4 we will have to issue another load for
-  * components z and w.
-  */
- int num_components;
- if (type_slots == 1)
-num_components = instr->num_components;
- else
-num_components = MIN2(2, instr->num_components);
-
- /* The computation of num_components doesn't take into account
-  * misalignment, which should be okay according to std140 vector
-  * alignment rules.
-  */
- assert(const_offset->u32[0] % 16 +
-type_sz(dest.type) * num_components <= 16);
-
- int remaining_components = instr->num_components;
- while (remaining_components > 0) {
-/* Read the vec4 from a 16-byte aligned offset */
-struct brw_reg const_offset_reg = brw_imm_ud(const_offset_aligned);
 bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
- retype(packed_consts, BRW_REGISTER_TYPE_F),
- surf_index, const_offset_reg);
-
-const fs_reg consts = byte_offset(packed_consts, 
(const_offset->u32[0] % 16));
-unsigned dest_offset = instr->num_components - 
remaining_components;
+ packed_consts, surf_index, brw_imm_ud(base & ~15));
 
-/* XXX: This doesn't update the sub-16B offset across iterations of
- * the loop, which should work for std140 vector alignment rules.
- */
-assert(dest_offset == 0 || const_offset->u32[0] % 16 == 0);
+const fs_reg consts =
+   retype(byte_offset(packed_consts, base & 15), dest.type);
 
-for (int i = 0; i < num_components; i++)
-   bld.MOV(offset(dest, bld, i + dest_offset), component(consts, 
i));
+for (unsigned d = 0; d < count; d++)
+   bld.MOV(offset(dest, bld, c + d), component(consts, d));
 
-/* If this is a large enough 64-bit load, we will need to emit
- * another message
- */
-remaining_components -= num_components;
-assert(remaining_components == 0 ||
-   (remaining_components <= 2 && type_slots == 2));
-num_components = remaining_components;
-const_offset_aligned += 16;
+c += count;
  }
   }
   break;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nir: Handle double-precision in fabs, frsq, fsqrt, fexp2 and flog2

2016-05-19 Thread Iago Toral Quiroga
We agreed in the list that it would be better to have these if they were
easy to implement.
---
 src/compiler/nir/nir_opcodes.py | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 8a3a80f..6dc0c90 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -153,13 +153,13 @@ unop("fnot", tfloat, "(src0 == 0.0f) ? 1.0f : 0.0f")
 unop("fsign", tfloat, "(src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)")
 unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
 unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
-unop("fabs", tfloat, "fabsf(src0)")
+unop("fabs", tfloat, "bit_size == 64 ? fabs(src0) : fabsf(src0)")
 unop("fsat", tfloat, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)")
 unop("frcp", tfloat, "1.0f / src0")
-unop("frsq", tfloat, "1.0f / sqrtf(src0)")
-unop("fsqrt", tfloat, "sqrtf(src0)")
-unop("fexp2", tfloat, "exp2f(src0)")
-unop("flog2", tfloat, "log2f(src0)")
+unop("frsq", tfloat, "bit_size == 64 ? 1.0 / sqrt(src0) : 1.0f / sqrtf(src0)")
+unop("fsqrt", tfloat, "bit_size == 64 ? sqrt(src0) : sqrtf(src0)")
+unop("fexp2", tfloat, "bit_size == 64 ? exp2(src0) : exp2f(src0)")
+unop("flog2", tfloat, "bit_size == 64 ? log2(src0) : log2f(src0)")
 unop_convert("f2i", tint32, tfloat32, "src0") # Float-to-integer conversion.
 unop_convert("f2u", tuint32, tfloat32, "src0") # Float-to-unsigned conversion
 unop_convert("d2i", tint32, tfloat64, "src0") # Double-to-integer conversion.
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] AMDGPU SI userspace support - experimental

2016-05-19 Thread Marek Olšák
Hi,

The following branches add SI support to the amdgpu side of our
graphics driver stack.

git://people.freedesktop.org/~mareko/mesa amdgpu-si
git://people.freedesktop.org/~mareko/libdrm amdgpu-si
git://people.freedesktop.org/~mareko/xf86-video-amdgpu amdgpu-si

Thanks to Ronie Salgado  for starting this.

This should be considered a work in progress, because it's not in a
working state yet. Below is a summary of what works and what doesn't.

The current kernel support in the drm-next-4.8-wip-si branch doesn't
work. It hangs or deadlocks on the first command submission that only
exercises EVENT_WRITE(ZPASS_DONE) and doesn't use draw packets. I
think it's a rebase issue, because a previous version of the kernel
support (based on kernel 4.4) still works and piglit on GBM gives the
same or similar results as radeon.

The X server has never worked for me. The screen is mostly black and
contains funny colors at the bottom.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] nvc0: clear out surfaces bufctx before rebinding everything

2016-05-19 Thread Pierre Moreau
On 09:28 PM - May 18 2016, Ilia Mirkin wrote:
> Otherwise we can end up in a situation where that bin just grows and
> grows.
> 
> Signed-off-by: Ilia Mirkin 
> ---
>  src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> index cc5ea5e..2523c20 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> @@ -963,6 +963,11 @@ nvc0_validate_suf(struct nvc0_context *nvc0, int s)
> struct nouveau_pushbuf *push = nvc0->base.pushbuf;
> struct nvc0_screen *screen = nvc0->screen;
>  
> +   if (s == 5)

This is not specific to this patch, but after seeing various patches with a
special case for `s == 5`, wouldn’t it make sense to have a define for that
index, both to make it a bit clearer to the reader why it is handled
differently, and to avoid typos? Same with having a define for the number of
surfaces (both 3D and CP) and another one for the number of 3D surfaces (or any
other method to avoid looping too many or too few times due to a typo)?

> +  nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
> +   else
> +  nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF);
> +
> for (int i = 0; i < NVC0_MAX_IMAGES; ++i) {
>struct pipe_image_view *view = >images[s][i];
>int width, height, depth;
> -- 
> 2.7.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: Use double-precision pow() when bit_size is 64, powf() otherwise

2016-05-19 Thread Iago Toral
I have just noticed that this was never pushed, right? I noticed this
while working on providing double-precision implementation for the other
functions discussed in the thread.

Iago

On Wed, 2016-03-23 at 20:09 -0700, Jason Ekstrand wrote:
> Is there a 64-bit pow in GLSL?  If so, this is the right
> implementation.
> 
> Reviewed-by: Jason Ekstrand 
> 
> On Mar 23, 2016 7:42 PM, "Ian Romanick"  wrote:
> From: Ian Romanick 
> 
> Found (randomly) by inspection.  Looking at the rest of the
> changes in
> this file in commit 9076c4e2, I'm certain this is what was
> intended.
> 
> Signed-off-by: Ian Romanick 
> Cc: Connor Abbott 
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>  src/compiler/nir/nir_opcodes.py | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/compiler/nir/nir_opcodes.py
> b/src/compiler/nir/nir_opcodes.py
> index 553f924..ac59130 100644
> --- a/src/compiler/nir/nir_opcodes.py
> +++ b/src/compiler/nir/nir_opcodes.py
> @@ -520,7 +520,7 @@ for (int i = 0; i < 32; i += 8) {
>  }
>  """)
> 
> -binop("fpow", tfloat, "", "bit_size == 64 ? powf(src0,
> src1) : pow(src0, src1)")
> +binop("fpow", tfloat, "", "bit_size == 64 ? pow(src0, src1) :
> powf(src0, src1)")
> 
>  binop_horiz("pack_half_2x16_split", 1, tuint32, 1, tfloat32,
> 1, tfloat32,
>  "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x)
> << 16)")
> --
> 2.5.5
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] nvc0: account for shader-allocated local memory needs

2016-05-19 Thread Samuel Pitoiset
I wonder why we don't set up the local memory size on nv50 (maybe we 
don't need to?). Anyway this will only affect OpenCL programs which is 
not going to happen in the near future.


I would suggest to add a comment which explains why we need that (ie. 
spilling).


Assuming info->bin.tlsSpace is correct, this patch is:

Reviewed-by: Samuel Pitoiset 

On 05/19/2016 03:28 AM, Ilia Mirkin wrote:

Signed-off-by: Ilia Mirkin 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_compute.c | 2 +-
 src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index d33f759..80d6f38 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -371,7 +371,7 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct 
pipe_grid_info *info)
PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc));

BEGIN_NVC0(push, NVC0_CP(LOCAL_POS_ALLOC), 3);
-   PUSH_DATA (push, align(cp->cp.lmem_size, 0x10));
+   PUSH_DATA (push, (cp->hdr[1] & 0xf0) + align(cp->cp.lmem_size, 0x10));
PUSH_DATA (push, 0);
PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */

diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c 
b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index 1fe6026..7334c5f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -553,7 +553,7 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
desc->blockdim_z = info->block[2];

desc->shared_size = align(cp->cp.smem_size, 0x100);
-   desc->local_size_p = align(cp->cp.lmem_size, 0x10);
+   desc->local_size_p = (cp->hdr[1] & 0xf0) + align(cp->cp.lmem_size, 
0x10);
desc->local_size_n = 0;
desc->cstack_size = 0x800;
desc->cache_split = nve4_compute_derive_cache_split(nvc0, cp->cp.smem_size);



--
-Samuel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] nvc0: clear out surfaces bufctx before rebinding everything

2016-05-19 Thread Samuel Pitoiset
Oops? Your patch is based on your gl43 branch with my images series but 
this is not yet upstream. ;)


Anyway, the idea sounds good to me and might explain some invalidation 
issues, perhaps? I'll have look later.


On 05/19/2016 03:28 AM, Ilia Mirkin wrote:

Otherwise we can end up in a situation where that bin just grows and
grows.

Signed-off-by: Ilia Mirkin 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index cc5ea5e..2523c20 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -963,6 +963,11 @@ nvc0_validate_suf(struct nvc0_context *nvc0, int s)
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_screen *screen = nvc0->screen;

+   if (s == 5)
+  nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
+   else
+  nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF);
+
for (int i = 0; i < NVC0_MAX_IMAGES; ++i) {
   struct pipe_image_view *view = >images[s][i];
   int width, height, depth;



--
-Samuel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/13] vbo: Declare the index range invalid for DrawIndirect

2016-05-19 Thread Jason Ekstrand
Right now, we're just setting the range to [0, MAX_UINT32] which, while
correct isn't helpful.  With DrawIndirect, you can't really know what the
actual range is so we may as well flag it as being an invalid range.  This
is what we do for draws with index buffer which is similar (the indices
aren't statically known) if a bit simpler.

Cc: "10.2" 
---
 src/mesa/vbo/vbo_context.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
index 9f807a1..ae5d265 100644
--- a/src/mesa/vbo/vbo_context.c
+++ b/src/mesa/vbo/vbo_context.c
@@ -170,7 +170,7 @@ vbo_draw_indirect_prims(struct gl_context *ctx,
}
 
vbo->draw_prims(ctx, prim, draw_count,
-   ib, GL_TRUE, 0, ~0,
+   ib, false, ~0, ~0,
NULL, 0,
ctx->DrawIndirectBuffer);
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/13] i965: Enable arb_robust_buffer_access_behavior on gen5+

2016-05-19 Thread Jason Ekstrand
---
 src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 8b4f685..a544263 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -276,6 +276,7 @@ intelInitExtensions(struct gl_context *ctx)
_mesa_override_glsl_version(>Const);
 
if (brw->gen >= 5) {
+  ctx->Extensions.ARB_robust_buffer_access_behavior = true;
   ctx->Extensions.ARB_texture_query_levels = ctx->Const.GLSLVersion >= 130;
   ctx->Extensions.ARB_texture_query_lod = true;
   ctx->Extensions.EXT_shader_integer_mix = ctx->Const.GLSLVersion >= 130;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/13] i965/draw: Account for BaseInstance in VBO bounds

2016-05-19 Thread Jason Ekstrand
Cc: "10.2" 
---
 src/mesa/drivers/dri/i965/brw_context.h | 1 +
 src/mesa/drivers/dri/i965/brw_draw.c| 4 +++-
 src/mesa/drivers/dri/i965/brw_draw_upload.c | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index c6fb8d2..76ed1de 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1311,6 +1311,7 @@ struct brw_context
 
uint32_t num_instances;
int basevertex;
+   int baseinstance;
 
struct {
   const struct brw_l3_config *config;
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 7901972..fa3ff5f 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -502,9 +502,11 @@ brw_try_draw_prims(struct gl_context *ctx,
   intel_batchbuffer_save_state(brw);
 
   if (brw->num_instances != prims[i].num_instances ||
-  brw->basevertex != prims[i].basevertex) {
+  brw->basevertex != prims[i].basevertex ||
+  brw->baseinstance != prims[i].base_instance) {
  brw->num_instances = prims[i].num_instances;
  brw->basevertex = prims[i].basevertex;
+ brw->baseinstance = prims[i].base_instance;
  if (i > 0) { /* For i == 0 we just did this before the loop */
 brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
 brw_merge_inputs(brw, arrays);
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c 
b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 725a65e..6d9e65e 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -473,7 +473,7 @@ brw_prepare_vertices(struct brw_context *brw)
  uint32_t range = intel_buffer->Base.Size;
  if (glarray->InstanceDivisor) {
 if (brw->num_instances) {
-   start = offset;
+   start = offset + glarray->StrideB * brw->baseinstance;
range = (glarray->StrideB * ((brw->num_instances /
  glarray->InstanceDivisor) - 1) +
 glarray->_ElementSize);
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/4] anv/pipeline: Only do buffer bounds checks if robustBufferAccess is enabled

2016-05-19 Thread Jason Ekstrand
---
 src/intel/vulkan/anv_nir_apply_dynamic_offsets.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c 
b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c
index 84fed0a..80ef8ee 100644
--- a/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c
+++ b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c
@@ -27,6 +27,7 @@
 static void
 apply_dynamic_offsets_block(nir_block *block, nir_builder *b,
 const struct anv_pipeline_layout *layout,
+bool add_bounds_checks,
 uint32_t indices_start)
 {
struct anv_descriptor_set_layout *set_layout;
@@ -86,6 +87,9 @@ apply_dynamic_offsets_block(nir_block *block, nir_builder *b,
   nir_instr_rewrite_src(>instr, offset_src,
 nir_src_for_ssa(new_offset));
 
+  if (!add_bounds_checks)
+ continue;
+
   /* In order to avoid out-of-bounds access, we predicate */
   nir_ssa_def *pred = nir_uge(b, nir_channel(b, _load->dest.ssa, 1),
   old_offset);
@@ -138,6 +142,8 @@ anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline,
if (!layout || !layout->stage[shader->stage].has_dynamic_offsets)
   return;
 
+   const bool add_bounds_checks = pipeline->device->robust_buffer_access;
+
nir_foreach_function(function, shader) {
   if (!function->impl)
  continue;
@@ -147,7 +153,7 @@ anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline,
 
   nir_foreach_block(block, function->impl) {
  apply_dynamic_offsets_block(block, , pipeline->layout,
- shader->num_uniforms);
+ add_bounds_checks, shader->num_uniforms);
   }
 
   nir_metadata_preserve(function->impl, nir_metadata_block_index |
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/13] glsl: Add an option to clamp block indices when lowering UBO/SSBOs

2016-05-19 Thread Jason Ekstrand
This prevents array overflow when the block is actually an array of UBOs or
SSBOs.  On some hardware such as i965, such overflows can cause GPU hangs.
---
 src/compiler/glsl/ir_optimization.h   |  2 +-
 src/compiler/glsl/linker.cpp  |  3 ++-
 src/compiler/glsl/lower_ubo_reference.cpp | 36 +++
 src/mesa/drivers/dri/i965/brw_compiler.c  |  1 +
 src/mesa/main/mtypes.h|  3 +++
 5 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index 5fc2740..4afa37e 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -123,7 +123,7 @@ bool lower_clip_distance(gl_shader *shader);
 void lower_output_reads(unsigned stage, exec_list *instructions);
 bool lower_packing_builtins(exec_list *instructions, int op_mask);
 void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size);
-void lower_ubo_reference(struct gl_shader *shader);
+void lower_ubo_reference(struct gl_shader *shader, bool clamp_block_indices);
 void lower_packed_varyings(void *mem_ctx,
unsigned locations_used, ir_variable_mode mode,
unsigned gs_input_vertices, gl_shader *shader,
diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 71a71df..07c8263 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -4879,7 +4879,8 @@ link_shaders(struct gl_context *ctx, struct 
gl_shader_program *prog)
  >Const.ShaderCompilerOptions[i];
 
   if (options->LowerBufferInterfaceBlocks)
- lower_ubo_reference(prog->_LinkedShaders[i]);
+ lower_ubo_reference(prog->_LinkedShaders[i],
+ options->ClampBlockIndicesToArrayBounds);
 
   if (options->LowerShaderSharedVariables)
  lower_shared_reference(prog->_LinkedShaders[i],
diff --git a/src/compiler/glsl/lower_ubo_reference.cpp 
b/src/compiler/glsl/lower_ubo_reference.cpp
index 1a0140f..749deed 100644
--- a/src/compiler/glsl/lower_ubo_reference.cpp
+++ b/src/compiler/glsl/lower_ubo_reference.cpp
@@ -44,8 +44,10 @@ namespace {
 class lower_ubo_reference_visitor :
   public lower_buffer_access::lower_buffer_access {
 public:
-   lower_ubo_reference_visitor(struct gl_shader *shader)
-   : shader(shader), struct_field(NULL), variable(NULL)
+   lower_ubo_reference_visitor(struct gl_shader *shader,
+   bool clamp_block_indices)
+   : shader(shader), clamp_block_indices(clamp_block_indices),
+ struct_field(NULL), variable(NULL)
{
}
 
@@ -104,6 +106,7 @@ public:
ir_visitor_status visit_enter(ir_call *ir);
 
struct gl_shader *shader;
+   bool clamp_block_indices;
struct gl_uniform_buffer_variable *ubo_var;
const struct glsl_struct_field *struct_field;
ir_variable *variable;
@@ -242,6 +245,26 @@ interface_field_name(void *mem_ctx, char *base_name, 
ir_rvalue *d,
return NULL;
 }
 
+static ir_rvalue *
+clamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type *type)
+{
+   assert(type->is_array());
+
+   const unsigned array_size = type->arrays_of_arrays_size();
+
+   ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1);
+   max_index->type = index->type;
+
+   ir_constant *zero = new(mem_ctx) ir_constant(0);
+   zero->type = index->type;
+
+   if (index->type->base_type == GLSL_TYPE_INT)
+  index = max2(index, zero);
+   index = min2(index, max_index);
+
+   return index;
+}
+
 void
 lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
  ir_variable *var,
@@ -258,6 +281,11 @@ lower_ubo_reference_visitor::setup_for_load_or_store(void 
*mem_ctx,
   interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
deref, _block_index);
 
+   if (nonconst_block_index && clamp_block_indices) {
+  nonconst_block_index =
+ clamp_to_array_bounds(mem_ctx, nonconst_block_index, var->type);
+   }
+
/* Locate the block by interface name */
unsigned num_blocks;
struct gl_uniform_block **blocks;
@@ -1062,9 +1090,9 @@ lower_ubo_reference_visitor::visit_enter(ir_call *ir)
 } /* unnamed namespace */
 
 void
-lower_ubo_reference(struct gl_shader *shader)
+lower_ubo_reference(struct gl_shader *shader, bool clamp_block_indices)
 {
-   lower_ubo_reference_visitor v(shader);
+   lower_ubo_reference_visitor v(shader, clamp_block_indices);
 
/* Loop over the instructions lowering references, because we take
 * a deref of a UBO array using a UBO dereference as the index will
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c 
b/src/mesa/drivers/dri/i965/brw_compiler.c
index 82131db..3f17589 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.c
+++ b/src/mesa/drivers/dri/i965/brw_compiler.c
@@ -188,6 +188,7 @@ brw_compiler_create(void *mem_ctx, const struct 

[Mesa-dev] [PATCH 09/13] i965/draw: Use the real size for index buffers

2016-05-19 Thread Jason Ekstrand
Previously, we were using the size of the whole BO which may be
substantially larger than the actual index buffer size.
---
 src/mesa/drivers/dri/i965/brw_context.h  | 1 +
 src/mesa/drivers/dri/i965/brw_draw_upload.c  | 8 ++--
 src/mesa/drivers/dri/i965/gen8_draw_upload.c | 2 +-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index d1d31e0..caaee13 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -991,6 +991,7 @@ struct brw_context
 
   /* Updates are signaled by BRW_NEW_INDEX_BUFFER. */
   drm_intel_bo *bo;
+  uint32_t size;
   GLuint type;
 
   /* Offset to index buffer index to use in CMD_3D_PRIM so that we can
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c 
b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 2eac385..ccdcb5a 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -1028,7 +1028,8 @@ brw_upload_indices(struct brw_context *brw)
   return;
 
ib_type_size = _mesa_sizeof_type(index_buffer->type);
-   ib_size = ib_type_size * index_buffer->count;
+   ib_size = index_buffer->count ? ib_type_size * index_buffer->count :
+   index_buffer->obj->Size;
bufferobj = index_buffer->obj;
 
/* Turn into a proper VBO:
@@ -1038,6 +1039,7 @@ brw_upload_indices(struct brw_context *brw)
*/
   intel_upload_data(brw, index_buffer->ptr, ib_size, ib_type_size,
>ib.bo, );
+  brw->ib.size = brw->ib.bo->size;
} else {
   offset = (GLuint) (unsigned long) index_buffer->ptr;
 
@@ -1057,6 +1059,7 @@ brw_upload_indices(struct brw_context *brw)
 
  intel_upload_data(brw, map, ib_size, ib_type_size,
>ib.bo, );
+ brw->ib.size = brw->ib.bo->size;
 
  ctx->Driver.UnmapBuffer(ctx, bufferobj, MAP_INTERNAL);
   } else {
@@ -1066,6 +1069,7 @@ brw_upload_indices(struct brw_context *brw)
  if (bo != brw->ib.bo) {
 drm_intel_bo_unreference(brw->ib.bo);
 brw->ib.bo = bo;
+brw->ib.size = bufferobj->Size;
 drm_intel_bo_reference(bo);
  }
   }
@@ -1120,7 +1124,7 @@ brw_emit_index_buffer(struct brw_context *brw)
  0);
OUT_RELOC(brw->ib.bo,
  I915_GEM_DOMAIN_VERTEX, 0,
-brw->ib.bo->size - 1);
+brw->ib.size - 1);
ADVANCE_BATCH();
 }
 
diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c 
b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
index 722cde6..2ad2c3f 100644
--- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
@@ -384,7 +384,7 @@ gen8_emit_index_buffer(struct brw_context *brw)
OUT_BATCH(CMD_INDEX_BUFFER << 16 | (5 - 2));
OUT_BATCH(brw_get_index_type(index_buffer->type) | mocs_wb);
OUT_RELOC64(brw->ib.bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
-   OUT_BATCH(brw->ib.bo->size);
+   OUT_BATCH(brw->ib.size);
ADVANCE_BATCH();
 }
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >