[Mesa-dev] [PATCH 2/5] tgsi: add support for geometry shader streams.

2019-03-28 Thread Dave Airlie
This adds support to retrieve the primitive counts
for each stream, along with the offset for each
primitive into the output array.

It also adds support for parsing the stream argument
to the emit and end instructions.

Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/draw/draw_gs.c   |  6 +--
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 60 --
 src/gallium/auxiliary/tgsi/tgsi_exec.h | 14 +-
 3 files changed, 62 insertions(+), 18 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_gs.c 
b/src/gallium/auxiliary/draw/draw_gs.c
index 18af1d90667..7664ecf8f3a 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -89,9 +89,9 @@ tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader,
 */
 
for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) {
-  unsigned num_verts_per_prim = machine->Primitives[prim_idx];
+  unsigned num_verts_per_prim = machine->Primitives[prim_idx][0];
   shader->primitive_lengths[prim_idx + shader->emitted_primitives] =
- machine->Primitives[prim_idx];
+ machine->Primitives[prim_idx][0];
   shader->emitted_vertices += num_verts_per_prim;
   for (j = 0; j < num_verts_per_prim; j++, current_idx++) {
  int idx = current_idx * shader->info.num_outputs;
@@ -696,7 +696,7 @@ draw_gs_init( struct draw_context *draw )
   if (!draw->gs.tgsi.machine)
  return FALSE;
 
-  draw->gs.tgsi.machine->Primitives = align_malloc(
+  draw->gs.tgsi.machine->Primitives[0] = align_malloc(
  MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16);
   if (!draw->gs.tgsi.machine->Primitives)
  return FALSE;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index a6a4d556abf..fc8f9d2d612 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -948,7 +948,22 @@ enum tgsi_exec_datatype {
 #define TEMP_OUTPUT_C  TGSI_EXEC_TEMP_OUTPUT_C
 #define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
 #define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
-
+#define TEMP_PRIMITIVE_S1_I   TGSI_EXEC_TEMP_PRIMITIVE_S1_I
+#define TEMP_PRIMITIVE_S1_C   TGSI_EXEC_TEMP_PRIMITIVE_S1_C
+#define TEMP_PRIMITIVE_S2_I   TGSI_EXEC_TEMP_PRIMITIVE_S2_I
+#define TEMP_PRIMITIVE_S2_C   TGSI_EXEC_TEMP_PRIMITIVE_S2_C
+#define TEMP_PRIMITIVE_S3_I   TGSI_EXEC_TEMP_PRIMITIVE_S3_I
+#define TEMP_PRIMITIVE_S3_C   TGSI_EXEC_TEMP_PRIMITIVE_S3_C
+
+static const struct {
+   int idx;
+   int chan;
+} temp_prim_idxs[] = {
+   { TEMP_PRIMITIVE_I, TEMP_PRIMITIVE_C },
+   { TEMP_PRIMITIVE_S1_I, TEMP_PRIMITIVE_S1_C },
+   { TEMP_PRIMITIVE_S2_I, TEMP_PRIMITIVE_S2_C },
+   { TEMP_PRIMITIVE_S3_I, TEMP_PRIMITIVE_S3_C },
+};
 
 /** The execution mask depends on the conditional mask and the loop mask */
 #define UPDATE_EXEC_MASK(MACH) \
@@ -2055,35 +2070,52 @@ exec_kill(struct tgsi_exec_machine *mach)
 }
 
 static void
-emit_vertex(struct tgsi_exec_machine *mach)
+emit_vertex(struct tgsi_exec_machine *mach,
+const struct tgsi_full_instruction *inst)
 {
+   union tgsi_exec_channel r[1];
+   unsigned stream_id;
+   unsigned *prim_count;
/* FIXME: check for exec mask correctly
unsigned i;
for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
  if ((mach->ExecMask & (1 << i)))
*/
+   IFETCH([0], 0, TGSI_CHAN_X);
+   stream_id = r[0].u[0];
+   prim_count = 
>Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream_id].chan].u[0];
if (mach->ExecMask) {
-  if 
(mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] >= 
mach->MaxOutputVertices)
+  if (mach->Primitives[stream_id][*prim_count] >= mach->MaxOutputVertices)
  return;
 
+  if (mach->Primitives[stream_id][*prim_count] == 0)
+ mach->PrimitiveOffsets[stream_id][*prim_count] = 
mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0];
   mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
-  
mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
+  mach->Primitives[stream_id][*prim_count]++;
}
 }
 
 static void
-emit_primitive(struct tgsi_exec_machine *mach)
+emit_primitive(struct tgsi_exec_machine *mach,
+   const struct tgsi_full_instruction *inst)
 {
-   unsigned *prim_count = 
>Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
+   unsigned *prim_count;
+   union tgsi_exec_channel r[1];
+   unsigned stream_id = 0;
/* FIXME: check for exec mask correctly
unsigned i;
for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
  if ((mach->ExecMask & (1 << i)))
*/
+   if (inst) {
+  IFETCH([0], 0, TGSI_CHAN_X);
+  stream_id = r[0].u[0];
+   }
+   prim_count = 
>Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream_id].chan].u[0];
if (mach->ExecMask) {
   ++(*prim_count);
   debug_assert((*prim_count * mach->NumOutputs) < 
mach->MaxGeometryShaderOutputs);
-  

[Mesa-dev] [PATCH 5/5] softpipe: add support for vertex streams

2019-03-28 Thread Dave Airlie
This enables the ARB_gpu_shader5 vertex streams on softpipe.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/softpipe/sp_screen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/softpipe/sp_screen.c 
b/src/gallium/drivers/softpipe/sp_screen.c
index 438557e146a..d2c31b8935d 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -122,7 +122,7 @@ softpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
   return 1024;
case PIPE_CAP_MAX_VERTEX_STREAMS:
-  return 1;
+  return PIPE_MAX_VERTEX_STREAMS;
case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
   return 2048;
case PIPE_CAP_PRIMITIVE_RESTART:
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] softpipe/draw: add support for vertex streams

2019-03-28 Thread Dave Airlie
This adds support for an ARB_gpu_shader5 feature, multiple
vertex streams, I initially wrote this a few years ago, and
decided to revisit it since virglrenderer is using softpipe
in it's CI and it might be nice to get closer to GLES3.1

This doesn't give use ARB_gpu_shader5 yet, but I think
it might be worth enabling it, as nearly everything that is
left is multi-sampling related, and we don't support that properly
anyways.

Dave.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/5] draw: add stream member to stats callback

2019-03-28 Thread Dave Airlie
This just adds space for the member to the callback, doesn't
change anything else.

Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/draw/draw_pt_so_emit.c | 2 +-
 src/gallium/auxiliary/draw/draw_vbuf.h   | 1 +
 src/gallium/drivers/llvmpipe/lp_setup_vbuf.c | 2 +-
 src/gallium/drivers/softpipe/sp_prim_vbuf.c  | 2 +-
 4 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c 
b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
index 261bd3467f9..829543cf615 100644
--- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
@@ -296,7 +296,7 @@ void draw_pt_so_emit( struct pt_so_emit *emit,
   }
}
 
-   render->set_stream_output_info(render,
+   render->set_stream_output_info(render, 0,
   emit->emitted_primitives,
   emit->generated_primitives);
 }
diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h 
b/src/gallium/auxiliary/draw/draw_vbuf.h
index 8faccda556c..6e737ae5b75 100644
--- a/src/gallium/auxiliary/draw/draw_vbuf.h
+++ b/src/gallium/auxiliary/draw/draw_vbuf.h
@@ -125,6 +125,7 @@ struct vbuf_render {
 * Called after writing data to the stream out buffers
 */
void (*set_stream_output_info)( struct vbuf_render *vbufr,
+   unsigned stream,
unsigned primitive_count,
unsigned primitive_generated );
 
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c 
b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
index 6675b20168b..17ac61cddb2 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
@@ -544,7 +544,7 @@ lp_setup_vbuf_destroy(struct vbuf_render *vbr)
  * increase too should call this from outside streamout code.
  */
 static void
-lp_setup_so_info(struct vbuf_render *vbr, uint primitives, uint prim_generated)
+lp_setup_so_info(struct vbuf_render *vbr, uint stream, uint primitives, uint 
prim_generated)
 {
struct lp_setup_context *setup = lp_setup_context(vbr);
struct llvmpipe_context *lp = llvmpipe_context(setup->pipe);
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c 
b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
index 1ce04a2f11d..783adf4f448 100644
--- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
@@ -597,7 +597,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, 
uint nr)
  * increase too should call this from outside streamout code.
  */
 static void
-sp_vbuf_so_info(struct vbuf_render *vbr, uint primitives, uint prim_generated)
+sp_vbuf_so_info(struct vbuf_render *vbr, uint stream, uint primitives, uint 
prim_generated)
 {
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
struct softpipe_context *softpipe = cvbr->softpipe;
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/5] draw: add support to tgsi paths for geometry streams. (v2)

2019-03-28 Thread Dave Airlie
This hooks up the geometry shader processing to the TGSI
support added in the previous commits.

It doesn't change the llvm interface other than to
keep things building.

v2: fix some regressions caused by primitiveoffsets

Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/draw/draw_gs.c  | 198 +++---
 src/gallium/auxiliary/draw/draw_gs.h  |  21 +-
 src/gallium/auxiliary/draw/draw_pt.h  |   1 +
 .../draw/draw_pt_fetch_shade_pipeline.c   |  16 +-
 .../draw/draw_pt_fetch_shade_pipeline_llvm.c  |  14 +-
 src/gallium/auxiliary/draw/draw_pt_so_emit.c  |  64 +++---
 src/gallium/drivers/softpipe/sp_query.c   |   4 +-
 7 files changed, 194 insertions(+), 124 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_gs.c 
b/src/gallium/auxiliary/draw/draw_gs.c
index 7664ecf8f3a..1d6040993c6 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -75,12 +75,12 @@ draw_gs_should_flush(struct draw_geometry_shader *shader)
 /*#define DEBUG_OUTPUTS 1*/
 static void
 tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader,
+  unsigned stream,
   unsigned num_primitives,
   float (**p_output)[4])
 {
struct tgsi_exec_machine *machine = shader->machine;
unsigned prim_idx, j, slot;
-   unsigned current_idx = 0;
float (*output)[4];
 
output = *p_output;
@@ -89,14 +89,16 @@ tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader,
 */
 
for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) {
-  unsigned num_verts_per_prim = machine->Primitives[prim_idx][0];
-  shader->primitive_lengths[prim_idx + shader->emitted_primitives] =
- machine->Primitives[prim_idx][0];
-  shader->emitted_vertices += num_verts_per_prim;
-  for (j = 0; j < num_verts_per_prim; j++, current_idx++) {
- int idx = current_idx * shader->info.num_outputs;
+  unsigned num_verts_per_prim = machine->Primitives[stream][prim_idx];
+  unsigned prim_offset = machine->PrimitiveOffsets[stream][prim_idx];
+  shader->stream[stream].primitive_lengths[prim_idx + 
shader->stream[stream].emitted_primitives] =
+ machine->Primitives[stream][prim_idx];
+  shader->stream[stream].emitted_vertices += num_verts_per_prim;
+
+  for (j = 0; j < num_verts_per_prim; j++) {
+ int idx = prim_offset + j * shader->info.num_outputs;
 #ifdef DEBUG_OUTPUTS
- debug_printf("%d) Output vert:\n", idx / shader->info.num_outputs);
+ debug_printf("%d/%d) Output vert:\n", stream, idx / 
shader->info.num_outputs);
 #endif
  for (slot = 0; slot < shader->info.num_outputs; slot++) {
 output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[0];
@@ -115,7 +117,7 @@ tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader,
   }
}
*p_output = output;
-   shader->emitted_primitives += num_primitives;
+   shader->stream[stream].emitted_primitives += num_primitives;
 }
 
 /*#define DEBUG_INPUTS 1*/
@@ -201,16 +203,40 @@ static void tgsi_gs_prepare(struct draw_geometry_shader 
*shader,
}
 }
 
-static unsigned tgsi_gs_run(struct draw_geometry_shader *shader,
-unsigned input_primitives)
+static void tgsi_gs_run(struct draw_geometry_shader *shader,
+unsigned input_primitives,
+unsigned *out_prims)
 {
struct tgsi_exec_machine *machine = shader->machine;
+   int i;
 
/* run interpreter */
tgsi_exec_machine_run(machine, 0);
 
-   return
-  
machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0];
+   for (i = 0; i < 4; i++) {
+  int prim_i;
+  int prim_c;
+  switch (i) {
+  case 0:
+ prim_i = TGSI_EXEC_TEMP_PRIMITIVE_I;
+ prim_c = TGSI_EXEC_TEMP_PRIMITIVE_C;
+ break;
+  case 1:
+ prim_i = TGSI_EXEC_TEMP_PRIMITIVE_S1_I;
+ prim_c = TGSI_EXEC_TEMP_PRIMITIVE_S1_C;
+ break;
+  case 2:
+ prim_i = TGSI_EXEC_TEMP_PRIMITIVE_S2_I;
+ prim_c = TGSI_EXEC_TEMP_PRIMITIVE_S2_C;
+ break;
+  case 3:
+ prim_i = TGSI_EXEC_TEMP_PRIMITIVE_S3_I;
+ prim_c = TGSI_EXEC_TEMP_PRIMITIVE_S3_C;
+ break;
+  };
+
+  out_prims[i] = machine->Temps[prim_i].xyzw[prim_c].u[0];
+   }
 }
 
 #ifdef HAVE_LLVM
@@ -287,6 +313,7 @@ llvm_fetch_gs_input(struct draw_geometry_shader *shader,
 
 static void
 llvm_fetch_gs_outputs(struct draw_geometry_shader *shader,
+  unsigned stream,
   unsigned num_primitives,
   float (**p_output)[4])
 {
@@ -307,7 +334,7 @@ llvm_fetch_gs_outputs(struct draw_geometry_shader *shader,
   total_verts += shader->llvm_emitted_vertices[i];
}
 
-   output_ptr += shader->emitted_vertices * shader->vertex_size;
+   output_ptr += shader->stream[0].emitted_vertices * shader->vertex_size;
for (i = 0; i < 

[Mesa-dev] [PATCH 3/5] softpipe: add support for indexed queries.

2019-03-28 Thread Dave Airlie
We need indexed queries to retrieve the geom shader info.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/softpipe/sp_context.h   |  2 +-
 src/gallium/drivers/softpipe/sp_prim_vbuf.c |  4 ++--
 src/gallium/drivers/softpipe/sp_query.c | 23 +++--
 src/gallium/include/pipe/p_state.h  |  1 +
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_context.h 
b/src/gallium/drivers/softpipe/sp_context.h
index 7c42403009c..3bde9baa23a 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -94,7 +94,7 @@ struct softpipe_context {
struct draw_so_target *so_targets[PIPE_MAX_SO_BUFFERS];
unsigned num_so_targets;

-   struct pipe_query_data_so_statistics so_stats;
+   struct pipe_query_data_so_statistics so_stats[PIPE_MAX_VERTEX_STREAMS];
 
struct pipe_query_data_pipeline_statistics pipeline_statistics;
unsigned active_statistics_queries;
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c 
b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
index 783adf4f448..12734314bf0 100644
--- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
@@ -602,8 +602,8 @@ sp_vbuf_so_info(struct vbuf_render *vbr, uint stream, uint 
primitives, uint prim
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
struct softpipe_context *softpipe = cvbr->softpipe;
 
-   softpipe->so_stats.num_primitives_written += primitives;
-   softpipe->so_stats.primitives_storage_needed += prim_generated;
+   softpipe->so_stats[stream].num_primitives_written += primitives;
+   softpipe->so_stats[stream].primitives_storage_needed += prim_generated;
 }
 
 static void
diff --git a/src/gallium/drivers/softpipe/sp_query.c 
b/src/gallium/drivers/softpipe/sp_query.c
index 5c9afe6fe47..7187c562068 100644
--- a/src/gallium/drivers/softpipe/sp_query.c
+++ b/src/gallium/drivers/softpipe/sp_query.c
@@ -39,6 +39,7 @@
 
 struct softpipe_query {
unsigned type;
+   unsigned index;
uint64_t start;
uint64_t end;
struct pipe_query_data_so_statistics so;
@@ -73,7 +74,7 @@ softpipe_create_query(struct pipe_context *pipe,
   type == PIPE_QUERY_TIMESTAMP_DISJOINT);
sq = CALLOC_STRUCT( softpipe_query );
sq->type = type;
-
+   sq->index = index;
return (struct pipe_query *)sq;
 }
 
@@ -101,8 +102,8 @@ softpipe_begin_query(struct pipe_context *pipe, struct 
pipe_query *q)
   sq->start = os_time_get_nano();
   break;
case PIPE_QUERY_SO_STATISTICS:
-  sq->so.num_primitives_written = 
softpipe->so_stats.num_primitives_written;
-  sq->so.primitives_storage_needed = 
softpipe->so_stats.primitives_storage_needed;
+  sq->so.num_primitives_written = 
softpipe->so_stats[0].num_primitives_written;
+  sq->so.primitives_storage_needed = 
softpipe->so_stats[0].primitives_storage_needed;
   break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
@@ -110,10 +111,10 @@ softpipe_begin_query(struct pipe_context *pipe, struct 
pipe_query *q)
   sq->so.primitives_storage_needed = 
softpipe->so_stats.primitives_storage_needed;
   break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
-  sq->so.num_primitives_written = 
softpipe->so_stats.num_primitives_written;
+  sq->so.num_primitives_written = 
softpipe->so_stats[sq->index].num_primitives_written;
   break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
-  sq->so.primitives_storage_needed = 
softpipe->so_stats.primitives_storage_needed;
+  sq->so.primitives_storage_needed = 
softpipe->so_stats[sq->index].primitives_storage_needed;
   break;
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_GPU_FINISHED:
@@ -161,24 +162,24 @@ softpipe_end_query(struct pipe_context *pipe, struct 
pipe_query *q)
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
   sq->so.num_primitives_written =
- softpipe->so_stats.num_primitives_written - 
sq->so.num_primitives_written;
+ softpipe->so_stats[0].num_primitives_written - 
sq->so.num_primitives_written;
   sq->so.primitives_storage_needed =
- softpipe->so_stats.primitives_storage_needed - 
sq->so.primitives_storage_needed;
+ softpipe->so_stats[0].primitives_storage_needed - 
sq->so.primitives_storage_needed;
   sq->end = sq->so.primitives_storage_needed > 
sq->so.num_primitives_written;
   break;
case PIPE_QUERY_SO_STATISTICS:
   sq->so.num_primitives_written =
- softpipe->so_stats.num_primitives_written - 
sq->so.num_primitives_written;
+ softpipe->so_stats[sq->index].num_primitives_written - 
sq->so.num_primitives_written;
   sq->so.primitives_storage_needed =
- softpipe->so_stats.primitives_storage_needed - 
sq->so.primitives_storage_needed;
+ softpipe->so_stats[sq->index].primitives_storage_needed - 
sq->so.primitives_storage_needed;
   break;
case 

Re: [Mesa-dev] [PATCH 2/4] ac: add ac_build_frex_exp() helper ans 16-bit/32-bit support

2019-03-28 Thread Timothy Arceri

This change broke radeonsi for:

tests/spec/arb_gpu_shader_fp64/execution/built-in-functions/fs-frexp-dvec4.shader_test

LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.frexp.exp

On 23/3/19 12:52 am, Samuel Pitoiset wrote:

Signed-off-by: Samuel Pitoiset 
---
  src/amd/common/ac_llvm_build.c  | 24 
  src/amd/common/ac_llvm_build.h  |  4 
  src/amd/common/ac_nir_to_llvm.c |  8 +---
  3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 4fd1d14b78f..5572b244720 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -3927,6 +3927,30 @@ ac_build_shuffle(struct ac_llvm_context *ctx, 
LLVMValueRef src, LLVMValueRef ind
  AC_FUNC_ATTR_CONVERGENT);
  }
  
+LLVMValueRef

+ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0,
+  unsigned bitsize)
+{
+   LLVMTypeRef type;
+   char *intr;
+
+   if (bitsize == 16) {
+   intr = "llvm.amdgcn.frexp.exp.i16.f16";
+   type = ctx->i16;
+   } else if (bitsize == 32) {
+   intr = "llvm.amdgcn.frexp.exp.i32.f32";
+   type = ctx->i32;
+   } else {
+   intr = "llvm.amdgcn.frexp.exp.i32.f64";
+   type = ctx->i64;
+   }
+
+   LLVMValueRef params[] = {
+   src0,
+   };
+   return ac_build_intrinsic(ctx, intr, type, params, 1,
+ AC_FUNC_ATTR_READNONE);
+}
  LLVMValueRef
  ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize)
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index db20b39d443..c3277fd2d13 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -677,6 +677,10 @@ ac_build_quad_swizzle(struct ac_llvm_context *ctx, 
LLVMValueRef src,
  LLVMValueRef
  ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef 
index);
  
+LLVMValueRef

+ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0,
+  unsigned bitsize);
+
  LLVMValueRef
  ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize);
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 307a71c00ab..9331fd14b7d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -803,9 +803,11 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
break;
case nir_op_frexp_exp:
src[0] = ac_to_float(>ac, src[0]);
-   result = ac_build_intrinsic(>ac, 
"llvm.amdgcn.frexp.exp.i32.f64",
-   ctx->ac.i32, src, 1, 
AC_FUNC_ATTR_READNONE);
-
+   result = ac_build_frexp_exp(>ac, src[0],
+   ac_get_elem_bits(>ac, 
LLVMTypeOf(src[0])));
+   if (ac_get_elem_bits(>ac, LLVMTypeOf(src[0])) == 16)
+   result = LLVMBuildSExt(ctx->ac.builder, result,
+  ctx->ac.i32, "");
break;
case nir_op_frexp_sig:
src[0] = ac_to_float(>ac, src[0]);

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] ddebug: add compute functions to help hang detection

2019-03-28 Thread Dave Airlie
From: Dave Airlie 

---
 .../auxiliary/driver_ddebug/dd_context.c  | 23 +--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/driver_ddebug/dd_context.c 
b/src/gallium/auxiliary/driver_ddebug/dd_context.c
index 15efeccf879..4ea0b16de6f 100644
--- a/src/gallium/auxiliary/driver_ddebug/dd_context.c
+++ b/src/gallium/auxiliary/driver_ddebug/dd_context.c
@@ -647,6 +647,25 @@ dd_context_resource_commit(struct pipe_context *_pipe,
return pipe->resource_commit(pipe, resource, level, box, commit);
 }
 
+static void
+dd_context_set_compute_resources(struct pipe_context *_pipe,
+unsigned start, unsigned count,
+struct pipe_surface **resources)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+   return pipe->set_compute_resources(pipe, start, count, resources);
+}
+
+static void
+dd_context_set_global_binding(struct pipe_context *_pipe,
+ unsigned first, unsigned count,
+ struct pipe_resource **resources,
+ uint32_t **handles)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+   return pipe->set_global_binding(pipe, first, count, resources, handles);
+}
+
 static void
 dd_context_get_sample_position(struct pipe_context *_pipe,
unsigned sample_count, unsigned sample_index,
@@ -855,10 +874,10 @@ dd_context_create(struct dd_screen *dscreen, struct 
pipe_context *pipe)
CTX_INIT(texture_barrier);
CTX_INIT(memory_barrier);
CTX_INIT(resource_commit);
+   CTX_INIT(set_compute_resources);
+   CTX_INIT(set_global_binding);
/* create_video_codec */
/* create_video_buffer */
-   /* set_compute_resources */
-   /* set_global_binding */
CTX_INIT(get_sample_position);
CTX_INIT(invalidate_resource);
CTX_INIT(get_device_reset_status);
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 9/9] panfrost: Implement missing texture formats

2019-03-28 Thread Alyssa Rosenzweig
 - Implements RGB565/RGBA5551 formats
 - Don't advertise support for flipped RGBA5551 and ETC

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_format.c | 12 
 src/gallium/drivers/panfrost/pan_screen.c |  6 +-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/panfrost/pan_format.c 
b/src/gallium/drivers/panfrost/pan_format.c
index cac8a3d0859..cb1c71fca29 100644
--- a/src/gallium/drivers/panfrost/pan_format.c
+++ b/src/gallium/drivers/panfrost/pan_format.c
@@ -182,6 +182,18 @@ panfrost_find_format(const struct util_format_description 
*desc)
 case PIPE_FORMAT_Z32_UNORM:
 return MALI_Z32_UNORM;
 
+case PIPE_FORMAT_B5G6R5_UNORM:
+return MALI_RGB565;
+
+case PIPE_FORMAT_B5G5R5A1_UNORM:
+return MALI_RGB5_A1_UNORM;
+
+case PIPE_FORMAT_A1B5G5R5_UNORM:
+case PIPE_FORMAT_X1B5G5R5_UNORM:
+/* Not supported - this is backwards from OpenGL! */
+assert(0);
+break;
+
 default:
 /* Fallthrough to default */
 break;
diff --git a/src/gallium/drivers/panfrost/pan_screen.c 
b/src/gallium/drivers/panfrost/pan_screen.c
index 45569d59535..682eb37f5c7 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -440,6 +440,9 @@ panfrost_is_format_supported( struct pipe_screen *screen,
 if (format == PIPE_FORMAT_Z24X8_UNORM || format == 
PIPE_FORMAT_X8Z24_UNORM)
 return FALSE;
 
+if (format == PIPE_FORMAT_A1B5G5R5_UNORM || format == 
PIPE_FORMAT_X1B5G5R5_UNORM)
+return FALSE;
+
 if (bind & PIPE_BIND_RENDER_TARGET) {
 /* We don't support rendering into anything but RGBA8 yet. We
  * need more formats for spec compliance, but for now, honesty
@@ -467,7 +470,8 @@ panfrost_is_format_supported( struct pipe_screen *screen,
 }
 
 if (format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC ||
-format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
+format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC ||
+format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
 /* Compressed formats not yet hooked up. */
 return FALSE;
 }
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 5/9] panfrost: Include all cubemap faces in bitmap list

2019-03-28 Thread Alyssa Rosenzweig
Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_context.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 253328f3694..b4a26f71dd3 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1020,9 +1020,15 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, 
bool with_vertex_data)
 struct panfrost_resource *rsrc = (struct 
panfrost_resource *) tex_rsrc;
 
 /* Inject the address in. */
-for (int l = 0; l <= tex_rsrc->last_level; 
++l) {
-
ctx->sampler_views[t][i]->hw.swizzled_bitmaps[l] =
-rsrc->bo->gpu + 
rsrc->bo->slices[l].offset;
+for (int f = 0; f < tex_rsrc->array_size; ++f) 
{
+for (int l = 0; l <= 
tex_rsrc->last_level; ++l) {
+unsigned idx = f * 
(tex_rsrc->last_level + 1) + l;
+
+
ctx->sampler_views[t][i]->hw.swizzled_bitmaps[idx] =
+rsrc->bo->gpu +
+
rsrc->bo->slices[l].offset +
+f * 
rsrc->bo->cubemap_stride;
+}
 }
 
 trampolines[i] = 
panfrost_upload_transient(ctx, >sampler_views[t][i]->hw, sizeof(struct 
mali_texture_descriptor));
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 7/9] panfrost: Implement command stream for linear cubemaps

2019-03-28 Thread Alyssa Rosenzweig
Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_context.c  | 12 +++-
 src/gallium/drivers/panfrost/pan_resource.c |  2 +-
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index b4a26f71dd3..fcd5f538938 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1019,10 +1019,12 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, 
bool with_vertex_data)
 struct pipe_resource *tex_rsrc = 
ctx->sampler_views[t][i]->base.texture;
 struct panfrost_resource *rsrc = (struct 
panfrost_resource *) tex_rsrc;
 
-/* Inject the address in. */
-for (int f = 0; f < tex_rsrc->array_size; ++f) 
{
-for (int l = 0; l <= 
tex_rsrc->last_level; ++l) {
-unsigned idx = f * 
(tex_rsrc->last_level + 1) + l;
+/* Inject the addresses in, interleaving cube
+ * faces and mip levels appropriately. */
+
+for (int l = 0; l <= tex_rsrc->last_level; 
++l) {
+for (int f = 0; f < 
tex_rsrc->array_size; ++f) {
+unsigned idx = (l * 
tex_rsrc->array_size) + f;
 
 
ctx->sampler_views[t][i]->hw.swizzled_bitmaps[idx] =
 rsrc->bo->gpu +
@@ -1957,7 +1959,7 @@ panfrost_create_sampler_view(
 .format = format,
 
 .usage1 = 0x0,
-.is_not_cubemap = 1,
+.is_not_cubemap = texture->target != PIPE_TEXTURE_CUBE,
 
 .usage2 = usage2_layout
 },
diff --git a/src/gallium/drivers/panfrost/pan_resource.c 
b/src/gallium/drivers/panfrost/pan_resource.c
index a1285f21541..e217b358ecf 100644
--- a/src/gallium/drivers/panfrost/pan_resource.c
+++ b/src/gallium/drivers/panfrost/pan_resource.c
@@ -391,7 +391,7 @@ panfrost_transfer_map(struct pipe_context *pctx,
 transfer->base.usage = usage;
 transfer->base.box = *box;
 transfer->base.stride = bo->slices[level].stride;
-transfer->base.layer_stride = bytes_per_pixel * resource->width0; /* 
TODO: Cubemaps */
+transfer->base.layer_stride = bo->cubemap_stride;
 
 pipe_resource_reference(>base.resource, resource);
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 8/9] panfrost: Extend tiling for cubemaps

2019-03-28 Thread Alyssa Rosenzweig
transfer_unmap now tiles for any tiled resource, not just TEXTURE_2D,
which should more than just cubemaps!

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_resource.c | 28 ++---
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_resource.c 
b/src/gallium/drivers/panfrost/pan_resource.c
index e217b358ecf..3d93fd0dc4d 100644
--- a/src/gallium/drivers/panfrost/pan_resource.c
+++ b/src/gallium/drivers/panfrost/pan_resource.c
@@ -433,8 +433,6 @@ panfrost_tile_texture(struct panfrost_screen *screen, 
struct panfrost_resource *
 
 unsigned level = trans->base.level;
 
-assert(!trans->base.box.z);
-
 panfrost_texture_swizzle(
 trans->base.box.x,
 trans->base.box.y,
@@ -443,7 +441,10 @@ panfrost_tile_texture(struct panfrost_screen *screen, 
struct panfrost_resource *
 util_format_get_blocksize(rsrc->base.format),
 bo->slices[level].stride,
 trans->map,
-bo->cpu + bo->slices[level].offset);
+bo->cpu
++ bo->slices[level].offset
++ bo->cubemap_stride * trans->base.box.z
+);
 }
 
 static void
@@ -454,17 +455,16 @@ panfrost_unmap_bo(struct panfrost_context *ctx,
struct panfrost_bo *bo = (struct panfrost_bo 
*)pan_resource(transfer->resource)->bo;
 
 if (transfer->usage & PIPE_TRANSFER_WRITE) {
-if (transfer->resource->target == PIPE_TEXTURE_2D) {
-struct panfrost_resource *prsrc = (struct 
panfrost_resource *) transfer->resource;
-
-/* Gallium thinks writeback happens here; instead, 
this is our cue to tile */
-if (bo->layout == PAN_AFBC) {
-DBG("Warning: writes to afbc surface can't 
possibly work out well for you...\n");
-} else if (bo->layout == PAN_TILED) {
-struct pipe_context *gallium = (struct 
pipe_context *) ctx;
-struct panfrost_screen *screen = 
pan_screen(gallium->screen);
-panfrost_tile_texture(screen, prsrc, trans);
-}
+struct panfrost_resource *prsrc = (struct panfrost_resource *) 
transfer->resource;
+
+/* Gallium thinks writeback happens here; instead, this is our 
cue to tile */
+if (bo->layout == PAN_AFBC) {
+DBG("Warning: writes to afbc surface can't possibly 
work out well for you...\n");
+} else if (bo->layout == PAN_TILED) {
+struct pipe_context *gallium = (struct pipe_context *) 
ctx;
+struct panfrost_screen *screen = 
pan_screen(gallium->screen);
+assert(transfer->box.depth == 1);
+panfrost_tile_texture(screen, prsrc, trans);
 }
 }
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 0/9] Implement cubemaps

2019-03-28 Thread Alyssa Rosenzweig
This series implements support for decoding cubemaps in the command
streams and disassembly cubemap instructions in shaders. It then
implements support for generating these respective cube maps in the
command stream and emitting the corresponding instructions in the
shader.

Additionally, the last patch in the series implements some simple
support for the remaining formats required in ES2.

After this series, we are at 36/36 passing in:

dEQP-GLES2.functional.texture.format.*

Alyssa Rosenzweig (9):
  panfrost/midgard: Disassemble `cube` texture op
  panfrost/midgard: Add L/S op for writing cubemap coordinates
  panfrost: Preliminary work for cubemaps
  panfrost/decode: Decode all cubemap faces
  panfrost: Include all cubemap faces in bitmap list
  panfrost/midgard: Emit cubemap coordinates
  panfrost: Implement command stream for linear cubemaps
  panfrost: Extend tiling for cubemaps
  panfrost: Implement missing texture formats

 .../drivers/panfrost/midgard/disassemble.c|  1 +
 .../drivers/panfrost/midgard/helpers.h|  7 +++-
 .../drivers/panfrost/midgard/midgard.h|  9 +
 .../panfrost/midgard/midgard_compile.c| 30 +--
 src/gallium/drivers/panfrost/pan_context.c| 19 ++
 src/gallium/drivers/panfrost/pan_format.c | 12 ++
 src/gallium/drivers/panfrost/pan_resource.c   | 38 ++-
 src/gallium/drivers/panfrost/pan_resource.h   |  5 ++-
 src/gallium/drivers/panfrost/pan_screen.c |  6 ++-
 .../drivers/panfrost/pandecode/decode.c   |  8 +++-
 10 files changed, 103 insertions(+), 32 deletions(-)

-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/9] panfrost: Preliminary work for cubemaps

2019-03-28 Thread Alyssa Rosenzweig
Again, not yet functional, but this sets up the memory management for
cube maps.

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_context.c  | 3 ---
 src/gallium/drivers/panfrost/pan_resource.c | 8 ++--
 src/gallium/drivers/panfrost/pan_resource.h | 5 -
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 5139a33d0fc..253328f3694 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1906,9 +1906,6 @@ panfrost_create_sampler_view(
  * (data) itself. So, we serialise the descriptor here and cache it for
  * later. */
 
-/* TODO: Other types of textures */
-assert(template->target == PIPE_TEXTURE_2D);
-
 /* Make sure it's something with which we're familiar */
 assert(bytes_per_pixel >= 1 && bytes_per_pixel <= 4);
 
diff --git a/src/gallium/drivers/panfrost/pan_resource.c 
b/src/gallium/drivers/panfrost/pan_resource.c
index 9e663e51422..a1285f21541 100644
--- a/src/gallium/drivers/panfrost/pan_resource.c
+++ b/src/gallium/drivers/panfrost/pan_resource.c
@@ -218,7 +218,10 @@ panfrost_setup_slices(const struct pipe_resource *tmpl, 
struct panfrost_bo *bo)
 height = u_minify(height, 1);
 }
 
-bo->size = ALIGN(offset, 4096);
+assert(tmpl->array_size);
+
+bo->cubemap_stride = ALIGN(offset, 64);
+bo->size = ALIGN(bo->cubemap_stride * tmpl->array_size, 4096);
 }
 
 static struct panfrost_bo *
@@ -286,6 +289,7 @@ panfrost_resource_create(struct pipe_screen *screen,
 case PIPE_TEXTURE_1D:
 case PIPE_TEXTURE_2D:
 case PIPE_TEXTURE_3D:
+case PIPE_TEXTURE_CUBE:
 case PIPE_TEXTURE_RECT:
 break;
 default:
@@ -388,7 +392,6 @@ panfrost_transfer_map(struct pipe_context *pctx,
 transfer->base.box = *box;
 transfer->base.stride = bo->slices[level].stride;
 transfer->base.layer_stride = bytes_per_pixel * resource->width0; /* 
TODO: Cubemaps */
-assert(!transfer->base.box.z);
 
 pipe_resource_reference(>base.resource, resource);
 
@@ -417,6 +420,7 @@ panfrost_transfer_map(struct pipe_context *pctx,
 } else {
 return bo->cpu
 + bo->slices[level].offset
++ transfer->base.box.z * bo->cubemap_stride
 + transfer->base.box.y * bo->slices[level].stride
 + transfer->base.box.x * bytes_per_pixel;
 }
diff --git a/src/gallium/drivers/panfrost/pan_resource.h 
b/src/gallium/drivers/panfrost/pan_resource.h
index 3e076b873f7..a1315ab1b43 100644
--- a/src/gallium/drivers/panfrost/pan_resource.h
+++ b/src/gallium/drivers/panfrost/pan_resource.h
@@ -53,9 +53,12 @@ struct panfrost_bo {
 /* GPU address for the object */
 mali_ptr gpu;
 
-/* Size of the entire tree */
+/* Size of all entire trees */
 size_t size;
 
+/* Distance from tree to tree */
+unsigned cubemap_stride;
+
 /* Set if this bo was imported rather than allocated */
 bool imported;
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/9] panfrost/midgard: Add L/S op for writing cubemap coordinates

2019-03-28 Thread Alyssa Rosenzweig
Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/midgard/midgard.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/src/gallium/drivers/panfrost/midgard/midgard.h 
b/src/gallium/drivers/panfrost/midgard/midgard.h
index 59957c1b566..b2849aa0fee 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard.h
+++ b/src/gallium/drivers/panfrost/midgard/midgard.h
@@ -278,6 +278,14 @@ midgard_writeout;
 
 typedef enum {
 midgard_op_ld_st_noop   = 0x03,
+
+/* Unclear why this is on the L/S unit, but (with an address of 0,
+ * appropriate swizzle, magic constant 0x24, and xy mask?) moves fp32 
cube
+ * map coordinates in r27 to its cube map texture coordinate
+ * destination (e.g r29). 0x4 magic for loading from fp16 instead */
+
+midgard_op_store_cubemap_coords = 0x0E,
+
 midgard_op_load_attr_16 = 0x95,
 midgard_op_load_attr_32 = 0x94,
 midgard_op_load_vary_16 = 0x99,
@@ -507,6 +515,7 @@ static char *alu_opcode_names[256] = {
 };
 
 static char *load_store_opcode_names[256] = {
+[midgard_op_store_cubemap_coords] = "st_cubemap_coords",
 [midgard_op_load_attr_16] = "ld_attr_16",
 [midgard_op_load_attr_32] = "ld_attr_32",
 [midgard_op_load_vary_16] = "ld_vary_16",
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 6/9] panfrost/midgard: Emit cubemap coordinates

2019-03-28 Thread Alyssa Rosenzweig
Signed-off-by: Alyssa Rosenzweig 
---
 .../drivers/panfrost/midgard/helpers.h|  7 -
 .../panfrost/midgard/midgard_compile.c| 30 ---
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/panfrost/midgard/helpers.h 
b/src/gallium/drivers/panfrost/midgard/helpers.h
index 456c3fb5f1e..530a086e928 100644
--- a/src/gallium/drivers/panfrost/midgard/helpers.h
+++ b/src/gallium/drivers/panfrost/midgard/helpers.h
@@ -24,11 +24,16 @@
 
 /* Some constants and macros not found in the disassembler */
 
-#define OP_IS_STORE(op) (\
+#define OP_IS_STORE_VARY(op) (\
op == midgard_op_store_vary_16 || \
op == midgard_op_store_vary_32 \
)
 
+#define OP_IS_STORE(op) (\
+OP_IS_STORE_VARY(op) || \
+op == midgard_op_store_cubemap_coords \
+   )
+
 /* ALU control words are single bit fields with a lot of space */
 
 #define ALU_ENAB_VEC_MUL  (1 << 17)
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c 
b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
index 4640d921b7b..ea8c0153c96 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
@@ -284,6 +284,7 @@ M_LOAD(load_uniform_32);
 M_LOAD(load_color_buffer_8);
 //M_STORE(store_vary_16);
 M_STORE(store_vary_32);
+M_STORE(store_cubemap_coords);
 
 static midgard_instruction
 v_alu_br_compact_cond(midgard_jmp_writeout_op op, unsigned tag, signed offset, 
unsigned cond)
@@ -1544,10 +1545,31 @@ emit_tex(compiler_context *ctx, nir_tex_instr *instr)
 int index = nir_src_index(ctx, >src[i].src);
 
 midgard_vector_alu_src alu_src = blank_alu_src;
-alu_src.swizzle = (COMPONENT_Y << 2);
 
-midgard_instruction ins = v_fmov(index, alu_src, 
SSA_FIXED_REGISTER(REGISTER_TEXTURE_BASE + in_reg));
-emit_mir_instruction(ctx, ins);
+int reg = SSA_FIXED_REGISTER(REGISTER_TEXTURE_BASE + 
in_reg);
+
+if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+/* For cubemaps, we need to load coords into
+ * special r27, and then use a special ld/st op
+ * to copy into the texture register */
+
+alu_src.swizzle = SWIZZLE(COMPONENT_X, 
COMPONENT_Y, COMPONENT_Z, COMPONENT_X);
+
+midgard_instruction move = v_fmov(index, 
alu_src, SSA_FIXED_REGISTER(27));
+emit_mir_instruction(ctx, move);
+
+midgard_instruction st = 
m_store_cubemap_coords(reg, 0);
+st.load_store.unknown = 0x24; /* XXX: What is 
this? */
+st.load_store.mask = 0x3; /* xy? */
+st.load_store.swizzle = alu_src.swizzle;
+emit_mir_instruction(ctx, st);
+
+} else {
+alu_src.swizzle = SWIZZLE(COMPONENT_X, 
COMPONENT_Y, COMPONENT_X, COMPONENT_X);
+
+midgard_instruction ins = v_fmov(index, 
alu_src, reg);
+emit_mir_instruction(ctx, ins);
+}
 
 //midgard_pin_output(ctx, index, REGISTER_TEXTURE_BASE 
+ in_reg);
 
@@ -1927,7 +1949,7 @@ allocate_registers(compiler_context *ctx)
 break;
 
 case TAG_LOAD_STORE_4: {
-if (OP_IS_STORE(ins->load_store.op)) {
+if (OP_IS_STORE_VARY(ins->load_store.op)) {
 /* TODO: use ssa_args for store_vary */
 ins->load_store.reg = 0;
 } else {
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/9] panfrost/midgard: Disassemble `cube` texture op

2019-03-28 Thread Alyssa Rosenzweig
Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/midgard/disassemble.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/panfrost/midgard/disassemble.c 
b/src/gallium/drivers/panfrost/midgard/disassemble.c
index 66d32d942c4..376c8f46956 100644
--- a/src/gallium/drivers/panfrost/midgard/disassemble.c
+++ b/src/gallium/drivers/panfrost/midgard/disassemble.c
@@ -794,6 +794,7 @@ print_texture_format(int format)
 switch (format) {
 DEFINE_CASE(TEXTURE_2D, "2d");
 DEFINE_CASE(TEXTURE_3D, "3d");
+DEFINE_CASE(TEXTURE_CUBE, "cube");
 
 default:
 printf("fmt_%d", format);
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/9] panfrost/decode: Decode all cubemap faces

2019-03-28 Thread Alyssa Rosenzweig
Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pandecode/decode.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/panfrost/pandecode/decode.c 
b/src/gallium/drivers/panfrost/pandecode/decode.c
index 86fb968e334..966bce6ebd7 100644
--- a/src/gallium/drivers/panfrost/pandecode/decode.c
+++ b/src/gallium/drivers/panfrost/pandecode/decode.c
@@ -1463,7 +1463,13 @@ pandecode_replay_vertex_tiler_postfix_pre(const struct 
mali_vertex_tiler_postfix
 pandecode_log(".swizzled_bitmaps = 
{\n");
 pandecode_indent++;
 
-int bitmap_count = 1 + 
t->nr_mipmap_levels + t->unknown3A;
+int bitmap_count = 
MALI_NEGATIVE(t->nr_mipmap_levels);
+
+if (!f.is_not_cubemap) {
+/* Miptree for each face */
+bitmap_count *= 6;
+}
+
 int max_count = 
sizeof(t->swizzled_bitmaps) / sizeof(t->swizzled_bitmaps[0]);
 
 if (bitmap_count > max_count) {
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 01/10] panfrost: Fix vertex buffer corruption

2019-03-28 Thread Alyssa Rosenzweig
Fixes crash in dEQP-GLES2.functional.buffer.*

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_context.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index d242c3f90ce..5139a33d0fc 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -772,12 +772,14 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx)
  * rsrc->gpu. However, attribute buffers must be 64 aligned. If
  * it is not, for now we have to duplicate the buffer. */
 
-mali_ptr effective_address = (rsrc->bo->gpu + 
buf->buffer_offset);
+mali_ptr effective_address = rsrc ? (rsrc->bo->gpu + 
buf->buffer_offset) : 0;
 
-if (effective_address & 0x3F) {
-attrs[i].elements = panfrost_upload_transient(ctx, 
rsrc->bo->cpu + buf->buffer_offset, attrs[i].size) | 1;
+if (effective_address) {
+attrs[i].elements = panfrost_upload_transient(ctx, 
rsrc->bo->cpu + buf->buffer_offset, attrs[i].size) | MALI_ATTR_LINEAR;
+} else if (effective_address) {
+attrs[i].elements = effective_address | 
MALI_ATTR_LINEAR;
 } else {
-attrs[i].elements = effective_address | 1;
+/* Leave unset? */
 }
 }
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [AppVeyor] mesa master #10582 failed

2019-03-28 Thread AppVeyor



Build mesa 10582 failed


Commit 7832fb7889 by Ian Romanick on 5/23/2018 1:56 AM:

intel/compiler: Use partial redundancy elimination for compares\n\nAlmost all of the hurt shaders are repeated instances of the same shader\nin synmark's compilation speed tests.\n\nshader-db results:\n\nAll Gen6+ platforms had similar results. (Skylake shown)\ntotal instructions in shared programs: 15256840 -> 15256389 (<.01%)\ninstructions in affected programs: 54137 -> 53686 (-0.83%)\nhelped: 288\nHURT: 0\nhelped stats (abs) min: 1 max: 15 x̄: 1.57 x̃: 1\nhelped stats (rel) min: 0.06% max: 26.67% x̄: 1.99% x̃: 0.74%\n95% mean confidence interval for instructions value: -1.76 -1.38\n95% mean confidence interval for instructions %-change: -2.47% -1.50%\nInstructions are helped.\n\ntotal cycles in shared programs: 372286583 -> 372283851 (<.01%)\ncycles in affected programs: 833829 -> 831097 (-0.33%)\nhelped: 265\nHURT: 16\nhelped stats (abs) min: 2 max: 74 x̄: 11.81 x̃: 4\nhelped stats (rel) min: 0.04% max: 9.07% x̄: 0.99% x̃: 0.35%\nHURT stats (abs)   min: 2 max: 130 x̄: 24.88 x̃: 8\nHURT stats (rel)   min: <.01% max: 12.31% x̄: 1.44% x̃: 0.27%\n95% mean confidence interval for cycles value: -12.30 -7.15\n95% mean confidence interval for cycles %-change: -1.06% -0.64%\nCycles are helped.\n\nIron Lake and GM45 had similar results. (GM45 shown)\ntotal instructions in shared programs: 5038653 -> 5038495 (<.01%)\ninstructions in affected programs: 13939 -> 13781 (-1.13%)\nhelped: 50\nHURT: 1\nhelped stats (abs) min: 1 max: 15 x̄: 3.18 x̃: 4\nhelped stats (rel) min: 0.33% max: 13.33% x̄: 2.24% x̃: 1.09%\nHURT stats (abs)   min: 1 max: 1 x̄: 1.00 x̃: 1\nHURT stats (rel)   min: 0.83% max: 0.83% x̄: 0.83% x̃: 0.83%\n95% mean confidence interval for instructions value: -3.73 -2.47\n95% mean confidence interval for instructions %-change: -3.16% -1.21%\nInstructions are helped.\n\ntotal cycles in shared programs: 128118922 -> 128118228 (<.01%)\ncycles in affected programs: 134906 -> 134212 (-0.51%)\nhelped: 50\nHURT: 0\nhelped stats (abs) min: 2 max: 60 x̄: 13.88 x̃: 18\nhelped stats (rel) min: 0.06% max: 3.19% x̄: 0.74% x̃: 0.70%\n95% mean confidence interval for cycles value: -16.54 -11.22\n95% mean confidence interval for cycles %-change: -0.95% -0.53%\nCycles are helped.\n\nReviewed-by: Kenneth Graunke 


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] gallium/osmesa: Fix the inability to set no context as current.

2019-03-28 Thread zegentzy
https://www.mesa3d.org/submittingpatches.html says just send a link, so:

https://gitlab.freedesktop.org/mesa/mesa/merge_requests/533
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: do not always initialize HTILE in compressed state

2019-03-28 Thread Bas Nieuwenhuizen
r-b

On Thu, Mar 28, 2019 at 4:00 PM Samuel Pitoiset
 wrote:
>
> Especially when performing a transtion from UNDEFINED->GENERAL,
> the driver shouldn't initialize HTILE metadata in compressed
> state because it doesn't decompress when the src layout is
> GENERAL.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110259
> Fixes: 3a2e93147f7 ("radv: always initialize HTILE when the src layout is 
> UNDEFINED")
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index bed899d686e..7ee5a5ca7dc 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -4478,8 +4478,14 @@ static void radv_handle_depth_image_transition(struct 
> radv_cmd_buffer *cmd_buffe
> return;
>
> if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
> -   /* TODO: merge with the clear if applicable */
> -   radv_initialize_htile(cmd_buffer, image, range, 0);
> +   uint32_t clear_value = vk_format_is_stencil(image->vk_format) 
> ? 0xf30f : 0xfffc000f;
> +
> +   if (radv_layout_is_htile_compressed(image, dst_layout,
> +   dst_queue_mask)) {
> +   clear_value = 0;
> +   }
> +
> +   radv_initialize_htile(cmd_buffer, image, range, clear_value);
> } else if (!radv_layout_is_htile_compressed(image, src_layout, 
> src_queue_mask) &&
>radv_layout_is_htile_compressed(image, dst_layout, 
> dst_queue_mask)) {
> uint32_t clear_value = vk_format_is_stencil(image->vk_format) 
> ? 0xf30f : 0xfffc000f;
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Samuel Pitoiset


On 3/28/19 4:45 PM, Jason Ekstrand wrote:
On Thu, Mar 28, 2019 at 10:27 AM Samuel Pitoiset 
mailto:samuel.pitoi...@gmail.com>> wrote:


Calling it after the first call of radv_optimize_nir() is worse.


Ugh Do we know why?  I mean, it does emit the offset calculations 
in a slightly different order but I wouldn't expect it to hurt this 
bad. :-/

I don't know exactly what the problem is, I will investigate soon.


27747 shaders in 14347 tests
Totals:
SGPRS: 1248039 -> 1248031 (-0.00 %)
VGPRS: 868360 -> 868772 (0.05 %)
Spilled SGPRs: 24108 -> 24134 (0.11 %)
Spilled VGPRs: 122 -> 122 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 128 -> 128 (0.00 %) dwords per thread
Code Size: 46282020 -> 46336692 (0.12 %) bytes
LDS: 770 -> 770 (0.00 %) blocks
Max Waves: 199898 -> 199871 (-0.01 %)
Wait states: 0 -> 0 (0.00 %)

Totals from affected shaders:
SGPRS: 52848 -> 52840 (-0.02 %)
VGPRS: 47472 -> 47884 (0.87 %)
Spilled SGPRs: 5079 -> 5105 (0.51 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 4760924 -> 4815596 (1.15 %) bytes
LDS: 26 -> 26 (0.00 %) blocks
Max Waves: 3084 -> 3057 (-0.88 %)
Wait states: 0 -> 0 (0.00 %)

On 3/28/19 3:08 PM, Jason Ekstrand wrote:

On Wed, Mar 27, 2019 at 4:13 AM Samuel Pitoiset
mailto:samuel.pitoi...@gmail.com>> wrote:

This helps few compute shaders, mostly for F12017.

27670 shaders in 14347 tests
Totals:
SGPRS: 1231173 -> 1231173 (0.00 %)
VGPRS: 866056 -> 865928 (-0.01 %)
Spilled SGPRs: 24201 -> 24201 (0.00 %)
Code Size: 46137040 -> 46144868 (0.02 %) bytes
Max Waves: 232287 -> 232302 (0.01 %)

Totals from affected shaders:
SGPRS: 24624 -> 24624 (0.00 %)
VGPRS: 25960 -> 25832 (-0.49 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Code Size: 2922632 -> 2930460 (0.27 %) bytes
Max Waves: 1216 -> 1231 (1.23 %)

Suggested-by: mailto:ja...@jlekstrand.net>>
Signed-off-by: Samuel Pitoiset mailto:samuel.pitoi...@gmail.com>>
---
 src/amd/common/ac_nir_to_llvm.c | 8 
 src/amd/vulkan/radv_shader.c    | 5 -
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c
b/src/amd/common/ac_nir_to_llvm.c
index b25cc6a0a84..c46d98e6dd9 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct
ac_nir_context *ctx,
        case nir_intrinsic_vulkan_resource_reindex:
                result = visit_vulkan_resource_reindex(ctx,
instr);
                break;
+       case nir_intrinsic_load_vulkan_descriptor: {
+               LLVMValueRef values[2] = {
+                       get_src(ctx, instr->src[0]),
+                       ctx->ac.i32_0,
+               };
+               result = ac_build_gather_values(>ac,
values, 2);
+               break;
+       }
        case nir_intrinsic_store_ssbo:
                visit_store_ssbo(ctx, instr);
                break;
diff --git a/src/amd/vulkan/radv_shader.c
b/src/amd/vulkan/radv_shader.c
index 19a807df199..2751302e8b9 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct
radv_device *device,
                        }
                }
                const struct spirv_to_nir_options
spirv_options = {
-  .lower_ubo_ssbo_access_to_offsets = true,
                        .caps = {
.descriptor_array_dynamic_indexing = true,
                                .device_group = true,
@@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct
radv_device *device,
                NIR_PASS_V(nir, nir_lower_system_values);
                NIR_PASS_V(nir,
nir_lower_clip_cull_distance_arrays);
                NIR_PASS_V(nir, nir_lower_frexp);
+
+               NIR_PASS_V(nir, nir_lower_explicit_io,
+                         nir_var_mem_ubo | nir_var_mem_ssbo,
+  nir_address_format_32bit_index_offset);


If you actually want to get SSBO access optimization, you need to
call this *after* your first call to your main optimization loop.
Otherwise, the change is basically just a no-op which just
shuffles around the way address calculations are done a bit.

--Jason

        }

        /* Vulkan uses the separate-shader linking model */
-- 
2.21.0


___
   

Re: [Mesa-dev] [PATCH] radv: skip updating clear/color metadata for conditional rendering

2019-03-28 Thread Samuel Pitoiset


On 3/28/19 12:31 PM, Bas Nieuwenhuizen wrote:

r-b, though technically you may want something older for the fixes tag.


Pushed with a "cc 19.0" tag instead.



On Thu, Mar 28, 2019 at 12:20 PM Samuel Pitoiset
 wrote:

I don't think we should update metadata when conditional rendering
is enabled. For some reasons, some CTS breaks only on SI.

This fixes the following CTS on SI:
dEQP-VK.conditional_rendering.draw_clear.clear.depth.*

Fixes: a777c3d7cb0 ("radv: Use correct image view comparison for fast clears.")
Signed-off-by: Samuel Pitoiset 
---
  src/amd/vulkan/radv_cmd_buffer.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index fdf3024147d..bed899d686e 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1275,7 +1275,7 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer 
*cmd_buffer,
 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
 ++reg_count;

-   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 0));
+   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 
cmd_buffer->state.predicating));
 radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 S_370_WR_CONFIRM(1) |
 S_370_ENGINE_SEL(V_370_PFP));
@@ -1299,7 +1299,7 @@ radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer 
*cmd_buffer,
 uint64_t va = radv_buffer_get_va(image->bo);
 va += image->offset + image->tc_compat_zrange_offset;

-   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 
cmd_buffer->state.predicating));
 radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 S_370_WR_CONFIRM(1) |
 S_370_ENGINE_SEL(V_370_PFP));
@@ -1493,7 +1493,7 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer 
*cmd_buffer,

 assert(radv_image_has_cmask(image) || radv_image_has_dcc(image));

-   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0));
+   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 
cmd_buffer->state.predicating));
 radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 S_370_WR_CONFIRM(1) |
 S_370_ENGINE_SEL(V_370_PFP));
--
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeon/vcn/vp9: search the render target from the whole list

2019-03-28 Thread James Zhu
This Patch is  Tested-by: James Zhu 

On 2019-03-28 9:08 a.m., Liu, Leo wrote:
> The number of render targets could be more than max of references,
> so we search the full list of the render pictures for the current
> render target index
>
> https://bugs.freedesktop.org/show_bug.cgi?id=109648
>
> Signed-off-by: Leo Liu 
> Cc: 
> ---
>   src/gallium/drivers/radeon/radeon_vcn_dec.c | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.c 
> b/src/gallium/drivers/radeon/radeon_vcn_dec.c
> index d165c55f835..688cef90103 100644
> --- a/src/gallium/drivers/radeon/radeon_vcn_dec.c
> +++ b/src/gallium/drivers/radeon/radeon_vcn_dec.c
> @@ -491,7 +491,7 @@ static rvcn_dec_message_vp9_t get_vp9_msg(struct 
> radeon_decoder *dec,
>   
>   assert(dec->base.max_references + 1 <= 16);
>   
> - for (i = 0 ; i < dec->base.max_references + 1 ; ++i) {
> + for (i = 0 ; i < 16 ; ++i) {
>   if (dec->render_pic_list[i] && dec->render_pic_list[i] == 
> target) {
>   result.curr_pic_idx =
>   
> (uintptr_t)vl_video_buffer_get_associated_data(target, >base);
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110261] Segmentation fault when using vulkaninfo on Radeon

2019-03-28 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110261

--- Comment #2 from Kenneth Endfinger  ---
I am also running an AMD eGPU over ThunderBolt:

Section "Device"
  Identifier "AMD"
  Driver "amdgpu"
  BusID "PCI:61:0:0"
  Option "AllowEmptyInitialConfiguration"
  Option "AllowExternalGpus"
EndSection

Section "Device"
  Identifier "Intel"
  Driver "intel"
  BusID "PCI:0:2:0"
EndSection

kendfinger@melt ~ $ sudo lspci | grep "VGA"
00:02.0 VGA compatible controller: Intel Corporation UHD Graphics 630 (Mobile)
01:00.0 VGA compatible controller: NVIDIA Corporation GP107M [GeForce GTX 1050
Ti Mobile] (rev a1)
3d:00.0 VGA compatible controller: Advanced Micro Devices, Inc. [AMD/ATI]
Ellesmere [Radeon RX 470/480/570/570X/580/580X/590] (rev ef)

kendfinger@melt ~ $ xrandr --listproviders 
Providers: number : 2
Provider 0: id: 0xc1 cap: 0xf, Source Output, Sink Output, Source Offload, Sink
Offload crtcs: 6 outputs: 5 associated providers: 1 name:Radeon RX 570 Series @
pci::3d:00.0
Provider 1: id: 0x45 cap: 0xb, Source Output, Sink Output, Sink Offload crtcs:
4 outputs: 2 associated providers: 1 name:Intel

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Jason Ekstrand
On Thu, Mar 28, 2019 at 10:27 AM Samuel Pitoiset 
wrote:

> Calling it after the first call of radv_optimize_nir() is worse.
>

Ugh Do we know why?  I mean, it does emit the offset calculations in a
slightly different order but I wouldn't expect it to hurt this bad. :-/


> 27747 shaders in 14347 tests
> Totals:
> SGPRS: 1248039 -> 1248031 (-0.00 %)
> VGPRS: 868360 -> 868772 (0.05 %)
> Spilled SGPRs: 24108 -> 24134 (0.11 %)
> Spilled VGPRs: 122 -> 122 (0.00 %)
> Private memory VGPRs: 0 -> 0 (0.00 %)
> Scratch size: 128 -> 128 (0.00 %) dwords per thread
> Code Size: 46282020 -> 46336692 (0.12 %) bytes
> LDS: 770 -> 770 (0.00 %) blocks
> Max Waves: 199898 -> 199871 (-0.01 %)
> Wait states: 0 -> 0 (0.00 %)
>
> Totals from affected shaders:
> SGPRS: 52848 -> 52840 (-0.02 %)
> VGPRS: 47472 -> 47884 (0.87 %)
> Spilled SGPRs: 5079 -> 5105 (0.51 %)
> Spilled VGPRs: 0 -> 0 (0.00 %)
> Private memory VGPRs: 0 -> 0 (0.00 %)
> Scratch size: 0 -> 0 (0.00 %) dwords per thread
> Code Size: 4760924 -> 4815596 (1.15 %) bytes
> LDS: 26 -> 26 (0.00 %) blocks
> Max Waves: 3084 -> 3057 (-0.88 %)
> Wait states: 0 -> 0 (0.00 %)
> On 3/28/19 3:08 PM, Jason Ekstrand wrote:
>
> On Wed, Mar 27, 2019 at 4:13 AM Samuel Pitoiset 
> wrote:
>
>> This helps few compute shaders, mostly for F12017.
>>
>> 27670 shaders in 14347 tests
>> Totals:
>> SGPRS: 1231173 -> 1231173 (0.00 %)
>> VGPRS: 866056 -> 865928 (-0.01 %)
>> Spilled SGPRs: 24201 -> 24201 (0.00 %)
>> Code Size: 46137040 -> 46144868 (0.02 %) bytes
>> Max Waves: 232287 -> 232302 (0.01 %)
>>
>> Totals from affected shaders:
>> SGPRS: 24624 -> 24624 (0.00 %)
>> VGPRS: 25960 -> 25832 (-0.49 %)
>> Spilled SGPRs: 0 -> 0 (0.00 %)
>> Code Size: 2922632 -> 2930460 (0.27 %) bytes
>> Max Waves: 1216 -> 1231 (1.23 %)
>>
>> Suggested-by: 
>> Signed-off-by: Samuel Pitoiset 
>> ---
>>  src/amd/common/ac_nir_to_llvm.c | 8 
>>  src/amd/vulkan/radv_shader.c| 5 -
>>  2 files changed, 12 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/amd/common/ac_nir_to_llvm.c
>> b/src/amd/common/ac_nir_to_llvm.c
>> index b25cc6a0a84..c46d98e6dd9 100644
>> --- a/src/amd/common/ac_nir_to_llvm.c
>> +++ b/src/amd/common/ac_nir_to_llvm.c
>> @@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct ac_nir_context
>> *ctx,
>> case nir_intrinsic_vulkan_resource_reindex:
>> result = visit_vulkan_resource_reindex(ctx, instr);
>> break;
>> +   case nir_intrinsic_load_vulkan_descriptor: {
>> +   LLVMValueRef values[2] = {
>> +   get_src(ctx, instr->src[0]),
>> +   ctx->ac.i32_0,
>> +   };
>> +   result = ac_build_gather_values(>ac, values, 2);
>> +   break;
>> +   }
>> case nir_intrinsic_store_ssbo:
>> visit_store_ssbo(ctx, instr);
>> break;
>> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
>> index 19a807df199..2751302e8b9 100644
>> --- a/src/amd/vulkan/radv_shader.c
>> +++ b/src/amd/vulkan/radv_shader.c
>> @@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
>> }
>> }
>> const struct spirv_to_nir_options spirv_options = {
>> -   .lower_ubo_ssbo_access_to_offsets = true,
>> .caps = {
>> .descriptor_array_dynamic_indexing = true,
>> .device_group = true,
>> @@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct radv_device
>> *device,
>> NIR_PASS_V(nir, nir_lower_system_values);
>> NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
>> NIR_PASS_V(nir, nir_lower_frexp);
>> +
>> +   NIR_PASS_V(nir, nir_lower_explicit_io,
>> + nir_var_mem_ubo | nir_var_mem_ssbo,
>> + nir_address_format_32bit_index_offset);
>>
>
> If you actually want to get SSBO access optimization, you need to call
> this *after* your first call to your main optimization loop.  Otherwise,
> the change is basically just a no-op which just shuffles around the way
> address calculations are done a bit.
>
> --Jason
>
>
>> }
>>
>> /* Vulkan uses the separate-shader linking model */
>> --
>> 2.21.0
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110261] Segmentation fault when using vulkaninfo on Radeon

2019-03-28 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110261

Samuel Pitoiset  changed:

   What|Removed |Added

 CC||airl...@freedesktop.org,
   ||chadvers...@chromium.org,
   ||dan...@fooishbar.org,
   ||ja...@jlekstrand.net
 QA Contact|mesa-dev@lists.freedesktop. |
   |org |
  Component|Drivers/Vulkan/radeon   |Drivers/Vulkan/Common

--- Comment #1 from Samuel Pitoiset  ---
I can't reproduce but it crashes inside the WSI code path.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Samuel Pitoiset

Calling it after the first call of radv_optimize_nir() is worse.

27747 shaders in 14347 tests
Totals:
SGPRS: 1248039 -> 1248031 (-0.00 %)
VGPRS: 868360 -> 868772 (0.05 %)
Spilled SGPRs: 24108 -> 24134 (0.11 %)
Spilled VGPRs: 122 -> 122 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 128 -> 128 (0.00 %) dwords per thread
Code Size: 46282020 -> 46336692 (0.12 %) bytes
LDS: 770 -> 770 (0.00 %) blocks
Max Waves: 199898 -> 199871 (-0.01 %)
Wait states: 0 -> 0 (0.00 %)

Totals from affected shaders:
SGPRS: 52848 -> 52840 (-0.02 %)
VGPRS: 47472 -> 47884 (0.87 %)
Spilled SGPRs: 5079 -> 5105 (0.51 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 4760924 -> 4815596 (1.15 %) bytes
LDS: 26 -> 26 (0.00 %) blocks
Max Waves: 3084 -> 3057 (-0.88 %)
Wait states: 0 -> 0 (0.00 %)

On 3/28/19 3:08 PM, Jason Ekstrand wrote:
On Wed, Mar 27, 2019 at 4:13 AM Samuel Pitoiset 
mailto:samuel.pitoi...@gmail.com>> wrote:


This helps few compute shaders, mostly for F12017.

27670 shaders in 14347 tests
Totals:
SGPRS: 1231173 -> 1231173 (0.00 %)
VGPRS: 866056 -> 865928 (-0.01 %)
Spilled SGPRs: 24201 -> 24201 (0.00 %)
Code Size: 46137040 -> 46144868 (0.02 %) bytes
Max Waves: 232287 -> 232302 (0.01 %)

Totals from affected shaders:
SGPRS: 24624 -> 24624 (0.00 %)
VGPRS: 25960 -> 25832 (-0.49 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Code Size: 2922632 -> 2930460 (0.27 %) bytes
Max Waves: 1216 -> 1231 (1.23 %)

Suggested-by: mailto:ja...@jlekstrand.net>>
Signed-off-by: Samuel Pitoiset mailto:samuel.pitoi...@gmail.com>>
---
 src/amd/common/ac_nir_to_llvm.c | 8 
 src/amd/vulkan/radv_shader.c    | 5 -
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c
b/src/amd/common/ac_nir_to_llvm.c
index b25cc6a0a84..c46d98e6dd9 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct
ac_nir_context *ctx,
        case nir_intrinsic_vulkan_resource_reindex:
                result = visit_vulkan_resource_reindex(ctx, instr);
                break;
+       case nir_intrinsic_load_vulkan_descriptor: {
+               LLVMValueRef values[2] = {
+                       get_src(ctx, instr->src[0]),
+                       ctx->ac.i32_0,
+               };
+               result = ac_build_gather_values(>ac, values, 2);
+               break;
+       }
        case nir_intrinsic_store_ssbo:
                visit_store_ssbo(ctx, instr);
                break;
diff --git a/src/amd/vulkan/radv_shader.c
b/src/amd/vulkan/radv_shader.c
index 19a807df199..2751302e8b9 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct radv_device
*device,
                        }
                }
                const struct spirv_to_nir_options spirv_options = {
-                       .lower_ubo_ssbo_access_to_offsets = true,
                        .caps = {
.descriptor_array_dynamic_indexing = true,
                                .device_group = true,
@@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct radv_device
*device,
                NIR_PASS_V(nir, nir_lower_system_values);
                NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
                NIR_PASS_V(nir, nir_lower_frexp);
+
+               NIR_PASS_V(nir, nir_lower_explicit_io,
+                         nir_var_mem_ubo | nir_var_mem_ssbo,
+  nir_address_format_32bit_index_offset);


If you actually want to get SSBO access optimization, you need to call 
this *after* your first call to your main optimization loop.  
Otherwise, the change is basically just a no-op which just shuffles 
around the way address calculations are done a bit.


--Jason

        }

        /* Vulkan uses the separate-shader linking model */
-- 
2.21.0


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org 
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Samuel Pitoiset


On 3/28/19 3:08 PM, Jason Ekstrand wrote:
On Wed, Mar 27, 2019 at 4:13 AM Samuel Pitoiset 
mailto:samuel.pitoi...@gmail.com>> wrote:


This helps few compute shaders, mostly for F12017.

27670 shaders in 14347 tests
Totals:
SGPRS: 1231173 -> 1231173 (0.00 %)
VGPRS: 866056 -> 865928 (-0.01 %)
Spilled SGPRs: 24201 -> 24201 (0.00 %)
Code Size: 46137040 -> 46144868 (0.02 %) bytes
Max Waves: 232287 -> 232302 (0.01 %)

Totals from affected shaders:
SGPRS: 24624 -> 24624 (0.00 %)
VGPRS: 25960 -> 25832 (-0.49 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Code Size: 2922632 -> 2930460 (0.27 %) bytes
Max Waves: 1216 -> 1231 (1.23 %)

Suggested-by: mailto:ja...@jlekstrand.net>>
Signed-off-by: Samuel Pitoiset mailto:samuel.pitoi...@gmail.com>>
---
 src/amd/common/ac_nir_to_llvm.c | 8 
 src/amd/vulkan/radv_shader.c    | 5 -
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c
b/src/amd/common/ac_nir_to_llvm.c
index b25cc6a0a84..c46d98e6dd9 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct
ac_nir_context *ctx,
        case nir_intrinsic_vulkan_resource_reindex:
                result = visit_vulkan_resource_reindex(ctx, instr);
                break;
+       case nir_intrinsic_load_vulkan_descriptor: {
+               LLVMValueRef values[2] = {
+                       get_src(ctx, instr->src[0]),
+                       ctx->ac.i32_0,
+               };
+               result = ac_build_gather_values(>ac, values, 2);
+               break;
+       }
        case nir_intrinsic_store_ssbo:
                visit_store_ssbo(ctx, instr);
                break;
diff --git a/src/amd/vulkan/radv_shader.c
b/src/amd/vulkan/radv_shader.c
index 19a807df199..2751302e8b9 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct radv_device
*device,
                        }
                }
                const struct spirv_to_nir_options spirv_options = {
-                       .lower_ubo_ssbo_access_to_offsets = true,
                        .caps = {
.descriptor_array_dynamic_indexing = true,
                                .device_group = true,
@@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct radv_device
*device,
                NIR_PASS_V(nir, nir_lower_system_values);
                NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
                NIR_PASS_V(nir, nir_lower_frexp);
+
+               NIR_PASS_V(nir, nir_lower_explicit_io,
+                         nir_var_mem_ubo | nir_var_mem_ssbo,
+  nir_address_format_32bit_index_offset);


If you actually want to get SSBO access optimization, you need to call 
this *after* your first call to your main optimization loop.  
Otherwise, the change is basically just a no-op which just shuffles 
around the way address calculations are done a bit.

Oh okay, let me try that.


--Jason

        }

        /* Vulkan uses the separate-shader linking model */
-- 
2.21.0


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org 
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] radv: write availability status vkGetQueryPoolResults() when the data is not available

2019-03-28 Thread Samuel Pitoiset


On 3/25/19 8:16 AM, Samuel Iglesias Gonsálvez wrote:

On Fri, 2019-03-22 at 17:21 +0100, Samuel Pitoiset wrote:

Does this fix anything known?


I am writing CTS tests for VK_EXT_host_query_reset extension and I
found this bug while testing them on RADV.


Does that rule also apply for CmdCopyQueryPoolResults()? If so, we
might
need to fix it (I haven't looked yet).


The rule is slightly different on CmdCopyQueryPoolResults():

"Similarly, if VK_QUERY_RESULT_WITH_AVAILABILITY_BIT is set and
VK_QUERY_RESULT_WAIT_BIT is not set, the availability is guaranteed to
reflect the most recent use of the query on the same queue, assuming
that the query is not being simultaneously used by other queues. As
with vkGetQueryPoolResults, implementations must guarantee that if they
return a non-zero availability value, then the numerical results are
valid."

So if VK_QUERY_RESULT_WITH_AVAILABILITY_BIT we need to still set the
availability state.

I skimmed the implementation of this function on RADV, it seems it is
missing setting the availability value for all the queries except for
VK_QUERY_TYPE_TIMESTAMP.

Could you take care of this?

Yes, do you have tests for CmdCopyQueryPoolResults()?



With the comment on patch 1, series is:

Reviewed-by: Samuel Pitoiset 


OK, thanks! It seems I did a wrong squash of patches on patch 1. I will
fix it and push both patches to master.

Sam


On 3/22/19 1:03 PM, Samuel Iglesias Gonsálvez wrote:

If VK_QUERY_RESULT_WITH_AVAILABILY_BIT is set and
VK_QUERY_RESULT_WAIT_BIT and VK_QUERY_RESULT_PARTIAL_BIT are both
not
set, we need return to VK_NOT_READY only and set the availability
status field for each query.

  From Vulkan spec:

"If VK_QUERY_RESULT_WAIT_BIT and VK_QUERY_RESULT_PARTIAL_BIT are
both
not set then no result values are written to pData for queries that
are
in the unavailable state at the time of the call, and
vkGetQueryPoolResults returns VK_NOT_READY. However, availability
state
is still written to pData for those queries if
VK_QUERY_RESULT_WITH_AVAILABILITY_BIT is set."

Signed-off-by: Samuel Iglesias Gonsálvez 
---
   src/amd/vulkan/radv_query.c | 15 +++
   1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/src/amd/vulkan/radv_query.c
b/src/amd/vulkan/radv_query.c
index 8578680f09d..63a2ab773a8 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -1141,11 +1141,8 @@ VkResult radv_GetQueryPoolResults(
available = *(uint64_t *)src !=
TIMESTAMP_NOT_READY;
}
   
-			if (!available && !(flags &

VK_QUERY_RESULT_PARTIAL_BIT)) {
+   if (!available && !(flags &
VK_QUERY_RESULT_PARTIAL_BIT))
result = VK_NOT_READY;
-   break;
-
-   }
   
   			if (flags & VK_QUERY_RESULT_64_BIT) {

if (available || (flags &
VK_QUERY_RESULT_PARTIAL_BIT))
@@ -1178,11 +1175,8 @@ VkResult radv_GetQueryPoolResults(
}
}
   
-			if (!available && !(flags &

VK_QUERY_RESULT_PARTIAL_BIT)) {
+   if (!available && !(flags &
VK_QUERY_RESULT_PARTIAL_BIT))
result = VK_NOT_READY;
-   break;
-
-   }
   
   			if (flags & VK_QUERY_RESULT_64_BIT) {

if (available || (flags &
VK_QUERY_RESULT_PARTIAL_BIT))
@@ -1196,11 +1190,8 @@ VkResult radv_GetQueryPoolResults(
break;
}
case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
-   if (!available && !(flags &
VK_QUERY_RESULT_PARTIAL_BIT)) {
+   if (!available && !(flags &
VK_QUERY_RESULT_PARTIAL_BIT))
result = VK_NOT_READY;
-   break;
-
-   }
   
   			const uint64_t *start = (uint64_t*)src;

const uint64_t *stop = (uint64_t*)(src +
pipelinestat_block_size);

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radv: do not always initialize HTILE in compressed state

2019-03-28 Thread Samuel Pitoiset
Especially when performing a transtion from UNDEFINED->GENERAL,
the driver shouldn't initialize HTILE metadata in compressed
state because it doesn't decompress when the src layout is
GENERAL.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110259
Fixes: 3a2e93147f7 ("radv: always initialize HTILE when the src layout is 
UNDEFINED")
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index bed899d686e..7ee5a5ca7dc 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -4478,8 +4478,14 @@ static void radv_handle_depth_image_transition(struct 
radv_cmd_buffer *cmd_buffe
return;
 
if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
-   /* TODO: merge with the clear if applicable */
-   radv_initialize_htile(cmd_buffer, image, range, 0);
+   uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 
0xf30f : 0xfffc000f;
+
+   if (radv_layout_is_htile_compressed(image, dst_layout,
+   dst_queue_mask)) {
+   clear_value = 0;
+   }
+
+   radv_initialize_htile(cmd_buffer, image, range, clear_value);
} else if (!radv_layout_is_htile_compressed(image, src_layout, 
src_queue_mask) &&
   radv_layout_is_htile_compressed(image, dst_layout, 
dst_queue_mask)) {
uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 
0xf30f : 0xfffc000f;
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Jason Ekstrand
On Thu, Mar 28, 2019 at 9:29 AM Jason Ekstrand  wrote:

> I did some benchmarking of Assassin's Creed Odyssey today on ANV.
> Disabling the SSBO optimization possibilities in ANV that moving to derefs
> unlocks (I just moved the lowering super-early; moving back to index/offset
> would be insane) drops the perf of ACO by 20%.  You want SSBO derefs. :D
>

Or not Aparently the benchmark numbers aren't as reproducible as one
would like.  In any case, if you put the lowering after optimization, you
should see a more noticable (and positive) vkpipeline-db result.

--Jason


> On Thu, Mar 28, 2019 at 9:08 AM Jason Ekstrand 
> wrote:
>
>> On Wed, Mar 27, 2019 at 4:13 AM Samuel Pitoiset <
>> samuel.pitoi...@gmail.com> wrote:
>>
>>> This helps few compute shaders, mostly for F12017.
>>>
>>> 27670 shaders in 14347 tests
>>> Totals:
>>> SGPRS: 1231173 -> 1231173 (0.00 %)
>>> VGPRS: 866056 -> 865928 (-0.01 %)
>>> Spilled SGPRs: 24201 -> 24201 (0.00 %)
>>> Code Size: 46137040 -> 46144868 (0.02 %) bytes
>>> Max Waves: 232287 -> 232302 (0.01 %)
>>>
>>> Totals from affected shaders:
>>> SGPRS: 24624 -> 24624 (0.00 %)
>>> VGPRS: 25960 -> 25832 (-0.49 %)
>>> Spilled SGPRs: 0 -> 0 (0.00 %)
>>> Code Size: 2922632 -> 2930460 (0.27 %) bytes
>>> Max Waves: 1216 -> 1231 (1.23 %)
>>>
>>> Suggested-by: 
>>> Signed-off-by: Samuel Pitoiset 
>>> ---
>>>  src/amd/common/ac_nir_to_llvm.c | 8 
>>>  src/amd/vulkan/radv_shader.c| 5 -
>>>  2 files changed, 12 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/amd/common/ac_nir_to_llvm.c
>>> b/src/amd/common/ac_nir_to_llvm.c
>>> index b25cc6a0a84..c46d98e6dd9 100644
>>> --- a/src/amd/common/ac_nir_to_llvm.c
>>> +++ b/src/amd/common/ac_nir_to_llvm.c
>>> @@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct ac_nir_context
>>> *ctx,
>>> case nir_intrinsic_vulkan_resource_reindex:
>>> result = visit_vulkan_resource_reindex(ctx, instr);
>>> break;
>>> +   case nir_intrinsic_load_vulkan_descriptor: {
>>> +   LLVMValueRef values[2] = {
>>> +   get_src(ctx, instr->src[0]),
>>> +   ctx->ac.i32_0,
>>> +   };
>>> +   result = ac_build_gather_values(>ac, values, 2);
>>> +   break;
>>> +   }
>>> case nir_intrinsic_store_ssbo:
>>> visit_store_ssbo(ctx, instr);
>>> break;
>>> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
>>> index 19a807df199..2751302e8b9 100644
>>> --- a/src/amd/vulkan/radv_shader.c
>>> +++ b/src/amd/vulkan/radv_shader.c
>>> @@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct radv_device
>>> *device,
>>> }
>>> }
>>> const struct spirv_to_nir_options spirv_options = {
>>> -   .lower_ubo_ssbo_access_to_offsets = true,
>>> .caps = {
>>> .descriptor_array_dynamic_indexing =
>>> true,
>>> .device_group = true,
>>> @@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct radv_device
>>> *device,
>>> NIR_PASS_V(nir, nir_lower_system_values);
>>> NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
>>> NIR_PASS_V(nir, nir_lower_frexp);
>>> +
>>> +   NIR_PASS_V(nir, nir_lower_explicit_io,
>>> + nir_var_mem_ubo | nir_var_mem_ssbo,
>>> + nir_address_format_32bit_index_offset);
>>>
>>
>> If you actually want to get SSBO access optimization, you need to call
>> this *after* your first call to your main optimization loop.  Otherwise,
>> the change is basically just a no-op which just shuffles around the way
>> address calculations are done a bit.
>>
>> --Jason
>>
>>
>>> }
>>>
>>> /* Vulkan uses the separate-shader linking model */
>>> --
>>> 2.21.0
>>>
>>> ___
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Jason Ekstrand
I did some benchmarking of Assassin's Creed Odyssey today on ANV.
Disabling the SSBO optimization possibilities in ANV that moving to derefs
unlocks (I just moved the lowering super-early; moving back to index/offset
would be insane) drops the perf of ACO by 20%.  You want SSBO derefs. :D

On Thu, Mar 28, 2019 at 9:08 AM Jason Ekstrand  wrote:

> On Wed, Mar 27, 2019 at 4:13 AM Samuel Pitoiset 
> wrote:
>
>> This helps few compute shaders, mostly for F12017.
>>
>> 27670 shaders in 14347 tests
>> Totals:
>> SGPRS: 1231173 -> 1231173 (0.00 %)
>> VGPRS: 866056 -> 865928 (-0.01 %)
>> Spilled SGPRs: 24201 -> 24201 (0.00 %)
>> Code Size: 46137040 -> 46144868 (0.02 %) bytes
>> Max Waves: 232287 -> 232302 (0.01 %)
>>
>> Totals from affected shaders:
>> SGPRS: 24624 -> 24624 (0.00 %)
>> VGPRS: 25960 -> 25832 (-0.49 %)
>> Spilled SGPRs: 0 -> 0 (0.00 %)
>> Code Size: 2922632 -> 2930460 (0.27 %) bytes
>> Max Waves: 1216 -> 1231 (1.23 %)
>>
>> Suggested-by: 
>> Signed-off-by: Samuel Pitoiset 
>> ---
>>  src/amd/common/ac_nir_to_llvm.c | 8 
>>  src/amd/vulkan/radv_shader.c| 5 -
>>  2 files changed, 12 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/amd/common/ac_nir_to_llvm.c
>> b/src/amd/common/ac_nir_to_llvm.c
>> index b25cc6a0a84..c46d98e6dd9 100644
>> --- a/src/amd/common/ac_nir_to_llvm.c
>> +++ b/src/amd/common/ac_nir_to_llvm.c
>> @@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct ac_nir_context
>> *ctx,
>> case nir_intrinsic_vulkan_resource_reindex:
>> result = visit_vulkan_resource_reindex(ctx, instr);
>> break;
>> +   case nir_intrinsic_load_vulkan_descriptor: {
>> +   LLVMValueRef values[2] = {
>> +   get_src(ctx, instr->src[0]),
>> +   ctx->ac.i32_0,
>> +   };
>> +   result = ac_build_gather_values(>ac, values, 2);
>> +   break;
>> +   }
>> case nir_intrinsic_store_ssbo:
>> visit_store_ssbo(ctx, instr);
>> break;
>> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
>> index 19a807df199..2751302e8b9 100644
>> --- a/src/amd/vulkan/radv_shader.c
>> +++ b/src/amd/vulkan/radv_shader.c
>> @@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
>> }
>> }
>> const struct spirv_to_nir_options spirv_options = {
>> -   .lower_ubo_ssbo_access_to_offsets = true,
>> .caps = {
>> .descriptor_array_dynamic_indexing = true,
>> .device_group = true,
>> @@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct radv_device
>> *device,
>> NIR_PASS_V(nir, nir_lower_system_values);
>> NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
>> NIR_PASS_V(nir, nir_lower_frexp);
>> +
>> +   NIR_PASS_V(nir, nir_lower_explicit_io,
>> + nir_var_mem_ubo | nir_var_mem_ssbo,
>> + nir_address_format_32bit_index_offset);
>>
>
> If you actually want to get SSBO access optimization, you need to call
> this *after* your first call to your main optimization loop.  Otherwise,
> the change is basically just a no-op which just shuffles around the way
> address calculations are done a bit.
>
> --Jason
>
>
>> }
>>
>> /* Vulkan uses the separate-shader linking model */
>> --
>> 2.21.0
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Jason Ekstrand
On Wed, Mar 27, 2019 at 4:13 AM Samuel Pitoiset 
wrote:

> This helps few compute shaders, mostly for F12017.
>
> 27670 shaders in 14347 tests
> Totals:
> SGPRS: 1231173 -> 1231173 (0.00 %)
> VGPRS: 866056 -> 865928 (-0.01 %)
> Spilled SGPRs: 24201 -> 24201 (0.00 %)
> Code Size: 46137040 -> 46144868 (0.02 %) bytes
> Max Waves: 232287 -> 232302 (0.01 %)
>
> Totals from affected shaders:
> SGPRS: 24624 -> 24624 (0.00 %)
> VGPRS: 25960 -> 25832 (-0.49 %)
> Spilled SGPRs: 0 -> 0 (0.00 %)
> Code Size: 2922632 -> 2930460 (0.27 %) bytes
> Max Waves: 1216 -> 1231 (1.23 %)
>
> Suggested-by: 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 8 
>  src/amd/vulkan/radv_shader.c| 5 -
>  2 files changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c
> b/src/amd/common/ac_nir_to_llvm.c
> index b25cc6a0a84..c46d98e6dd9 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct ac_nir_context
> *ctx,
> case nir_intrinsic_vulkan_resource_reindex:
> result = visit_vulkan_resource_reindex(ctx, instr);
> break;
> +   case nir_intrinsic_load_vulkan_descriptor: {
> +   LLVMValueRef values[2] = {
> +   get_src(ctx, instr->src[0]),
> +   ctx->ac.i32_0,
> +   };
> +   result = ac_build_gather_values(>ac, values, 2);
> +   break;
> +   }
> case nir_intrinsic_store_ssbo:
> visit_store_ssbo(ctx, instr);
> break;
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 19a807df199..2751302e8b9 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
> }
> }
> const struct spirv_to_nir_options spirv_options = {
> -   .lower_ubo_ssbo_access_to_offsets = true,
> .caps = {
> .descriptor_array_dynamic_indexing = true,
> .device_group = true,
> @@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct radv_device *device,
> NIR_PASS_V(nir, nir_lower_system_values);
> NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
> NIR_PASS_V(nir, nir_lower_frexp);
> +
> +   NIR_PASS_V(nir, nir_lower_explicit_io,
> + nir_var_mem_ubo | nir_var_mem_ssbo,
> + nir_address_format_32bit_index_offset);
>

If you actually want to get SSBO access optimization, you need to call this
*after* your first call to your main optimization loop.  Otherwise, the
change is basically just a no-op which just shuffles around the way address
calculations are done a bit.

--Jason


> }
>
> /* Vulkan uses the separate-shader linking model */
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeon/vcn/vp9: search the render target from the whole list

2019-03-28 Thread Liu, Leo
The number of render targets could be more than max of references,
so we search the full list of the render pictures for the current
render target index

https://bugs.freedesktop.org/show_bug.cgi?id=109648

Signed-off-by: Leo Liu 
Cc: 
---
 src/gallium/drivers/radeon/radeon_vcn_dec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.c 
b/src/gallium/drivers/radeon/radeon_vcn_dec.c
index d165c55f835..688cef90103 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_dec.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_dec.c
@@ -491,7 +491,7 @@ static rvcn_dec_message_vp9_t get_vp9_msg(struct 
radeon_decoder *dec,
 
assert(dec->base.max_references + 1 <= 16);
 
-   for (i = 0 ; i < dec->base.max_references + 1 ; ++i) {
+   for (i = 0 ; i < 16 ; ++i) {
if (dec->render_pic_list[i] && dec->render_pic_list[i] == 
target) {
result.curr_pic_idx =

(uintptr_t)vl_video_buffer_get_associated_data(target, >base);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 107991] RX580 ~ ring gfx timeout ~ particular shaders created by a dolphin-emu game can crash AMDGPU, with both RadeonSI and RADV ~ attached apitrace for RadeonSI

2019-03-28 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107991

glencoesm...@hotmail.com changed:

   What|Removed |Added

 CC||glencoesm...@hotmail.com

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: skip updating clear/color metadata for conditional rendering

2019-03-28 Thread Bas Nieuwenhuizen
r-b, though technically you may want something older for the fixes tag.

On Thu, Mar 28, 2019 at 12:20 PM Samuel Pitoiset
 wrote:
>
> I don't think we should update metadata when conditional rendering
> is enabled. For some reasons, some CTS breaks only on SI.
>
> This fixes the following CTS on SI:
> dEQP-VK.conditional_rendering.draw_clear.clear.depth.*
>
> Fixes: a777c3d7cb0 ("radv: Use correct image view comparison for fast 
> clears.")
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index fdf3024147d..bed899d686e 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -1275,7 +1275,7 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer 
> *cmd_buffer,
> if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
> ++reg_count;
>
> -   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 0));
> +   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 
> cmd_buffer->state.predicating));
> radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
> S_370_WR_CONFIRM(1) |
> S_370_ENGINE_SEL(V_370_PFP));
> @@ -1299,7 +1299,7 @@ radv_set_tc_compat_zrange_metadata(struct 
> radv_cmd_buffer *cmd_buffer,
> uint64_t va = radv_buffer_get_va(image->bo);
> va += image->offset + image->tc_compat_zrange_offset;
>
> -   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
> +   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 
> cmd_buffer->state.predicating));
> radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
> S_370_WR_CONFIRM(1) |
> S_370_ENGINE_SEL(V_370_PFP));
> @@ -1493,7 +1493,7 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer 
> *cmd_buffer,
>
> assert(radv_image_has_cmask(image) || radv_image_has_dcc(image));
>
> -   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0));
> +   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 
> cmd_buffer->state.predicating));
> radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
> S_370_WR_CONFIRM(1) |
> S_370_ENGINE_SEL(V_370_PFP));
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radv: skip updating clear/color metadata for conditional rendering

2019-03-28 Thread Samuel Pitoiset
I don't think we should update metadata when conditional rendering
is enabled. For some reasons, some CTS breaks only on SI.

This fixes the following CTS on SI:
dEQP-VK.conditional_rendering.draw_clear.clear.depth.*

Fixes: a777c3d7cb0 ("radv: Use correct image view comparison for fast clears.")
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index fdf3024147d..bed899d686e 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1275,7 +1275,7 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer 
*cmd_buffer,
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
++reg_count;
 
-   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 0));
+   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 
cmd_buffer->state.predicating));
radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_PFP));
@@ -1299,7 +1299,7 @@ radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer 
*cmd_buffer,
uint64_t va = radv_buffer_get_va(image->bo);
va += image->offset + image->tc_compat_zrange_offset;
 
-   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 
cmd_buffer->state.predicating));
radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_PFP));
@@ -1493,7 +1493,7 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer 
*cmd_buffer,
 
assert(radv_image_has_cmask(image) || radv_image_has_dcc(image));
 
-   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0));
+   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 
cmd_buffer->state.predicating));
radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_PFP));
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/4] radv: enable VK_AMD_gpu_shader_int16

2019-03-28 Thread Bas Nieuwenhuizen
r-b

For series

On Fri, Mar 22, 2019, 2:49 PM Samuel Pitoiset 
wrote:

> This extension allows 16-bit support to Frexp/FrexpStruct.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_extensions.py | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/amd/vulkan/radv_extensions.py
> b/src/amd/vulkan/radv_extensions.py
> index 23106765c2a..e97f320e8a1 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -123,6 +123,7 @@ EXTENSIONS = [
>  Extension('VK_AMD_draw_indirect_count',   1, True),
>  Extension('VK_AMD_gcn_shader',1, True),
>  Extension('VK_AMD_gpu_shader_half_float', 1,
> 'device->rad_info.chip_class >= VI && HAVE_LLVM >= 0x0800'),
> +Extension('VK_AMD_gpu_shader_int16',  1,
> 'device->rad_info.chip_class >= VI'),
>  Extension('VK_AMD_rasterization_order',   1,
> 'device->has_out_of_order_rast'),
>  Extension('VK_AMD_shader_core_properties',1, True),
>  Extension('VK_AMD_shader_info',   1, True),
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Samuel Pitoiset


On 3/28/19 10:18 AM, Bas Nieuwenhuizen wrote:

R-b

Though not sure it really helps given code size increase?
I haven't benchmarked F12017 yet. The code size increases seems to be 
LLVM related but that shouldn't matter much.


On Wed, Mar 27, 2019, 10:13 AM Samuel Pitoiset 
mailto:samuel.pitoi...@gmail.com>> wrote:


This helps few compute shaders, mostly for F12017.

27670 shaders in 14347 tests
Totals:
SGPRS: 1231173 -> 1231173 (0.00 %)
VGPRS: 866056 -> 865928 (-0.01 %)
Spilled SGPRs: 24201 -> 24201 (0.00 %)
Code Size: 46137040 -> 46144868 (0.02 %) bytes
Max Waves: 232287 -> 232302 (0.01 %)

Totals from affected shaders:
SGPRS: 24624 -> 24624 (0.00 %)
VGPRS: 25960 -> 25832 (-0.49 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Code Size: 2922632 -> 2930460 (0.27 %) bytes
Max Waves: 1216 -> 1231 (1.23 %)

Suggested-by: mailto:ja...@jlekstrand.net>>
Signed-off-by: Samuel Pitoiset mailto:samuel.pitoi...@gmail.com>>
---
 src/amd/common/ac_nir_to_llvm.c | 8 
 src/amd/vulkan/radv_shader.c    | 5 -
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c
b/src/amd/common/ac_nir_to_llvm.c
index b25cc6a0a84..c46d98e6dd9 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct
ac_nir_context *ctx,
        case nir_intrinsic_vulkan_resource_reindex:
                result = visit_vulkan_resource_reindex(ctx, instr);
                break;
+       case nir_intrinsic_load_vulkan_descriptor: {
+               LLVMValueRef values[2] = {
+                       get_src(ctx, instr->src[0]),
+                       ctx->ac.i32_0,
+               };
+               result = ac_build_gather_values(>ac, values, 2);
+               break;
+       }
        case nir_intrinsic_store_ssbo:
                visit_store_ssbo(ctx, instr);
                break;
diff --git a/src/amd/vulkan/radv_shader.c
b/src/amd/vulkan/radv_shader.c
index 19a807df199..2751302e8b9 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct radv_device
*device,
                        }
                }
                const struct spirv_to_nir_options spirv_options = {
-                       .lower_ubo_ssbo_access_to_offsets = true,
                        .caps = {
.descriptor_array_dynamic_indexing = true,
                                .device_group = true,
@@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct radv_device
*device,
                NIR_PASS_V(nir, nir_lower_system_values);
                NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
                NIR_PASS_V(nir, nir_lower_frexp);
+
+               NIR_PASS_V(nir, nir_lower_explicit_io,
+                         nir_var_mem_ubo | nir_var_mem_ssbo,
+  nir_address_format_32bit_index_offset);
        }

        /* Vulkan uses the separate-shader linking model */
-- 
2.21.0


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org 
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] ac: allow to use vec3 for typed/untyped buffer stores/loads with LLVM 9+

2019-03-28 Thread Bas Nieuwenhuizen
r-b

On Tue, Mar 26, 2019, 10:13 AM Samuel Pitoiset 
wrote:

> 27670 shaders in 14347 tests
> Totals:
> SGPRS: 1231173 -> 1236757 (0.45 %)
> VGPRS: 866056 -> 867488 (0.17 %)
> Spilled SGPRs: 24201 -> 24169 (-0.13 %)
> Code Size: 46134836 -> 46115944 (-0.04 %) bytes
> Max Waves: 232287 -> 232070 (-0.09 %)
>
> Totals from affected shaders:
> SGPRS: 247624 -> 253208 (2.26 %)
> VGPRS: 214952 -> 216384 (0.67 %)
> Spilled SGPRs: 63 -> 31 (-50.79 %)
> Code Size: 7633772 -> 7614880 (-0.25 %) bytes
> Max Waves: 62065 -> 61848 (-0.35 %)
>
> This changes requires LLVM r356755.
>
> v2: - fix llvm 8
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_llvm_build.c | 33 +++--
>  src/amd/common/ac_llvm_build.h |  1 +
>  2 files changed, 24 insertions(+), 10 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c
> b/src/amd/common/ac_llvm_build.c
> index 1123dce2cc8..a816327ce95 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -83,6 +83,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
> ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
> ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
> ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
> +   ctx->v3f32 = LLVMVectorType(ctx->f32, 3);
> ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
> ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
> ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
> @@ -1150,9 +1151,12 @@ ac_build_llvm8_buffer_store_common(struct
> ac_llvm_context *ctx,
> args[idx++] = voffset ? voffset : ctx->i32_0;
> args[idx++] = soffset ? soffset : ctx->i32_0;
> args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 :
> 0), 0);
> -   unsigned func = CLAMP(num_channels, 1, 3) - 1;
> +   unsigned func = CLAMP(num_channels, 1, 4) - 1;
>
> -   const char *type_names[] = {"f32", "v2f32", "v4f32"};
> +   if (HAVE_LLVM == 0x800 && func == 2)
> +   func = 3; /* Only LLVM 9+ supports vec3 */
> +
> +   const char *type_names[] = {"f32", "v2f32", "v3f32", "v4f32"};
> const char *indexing_kind = structurized ? "struct" : "raw";
> char name[256];
>
> @@ -1334,10 +1338,13 @@ ac_build_llvm8_buffer_load_common(struct
> ac_llvm_context *ctx,
> args[idx++] = voffset ? voffset : ctx->i32_0;
> args[idx++] = soffset ? soffset : ctx->i32_0;
> args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 :
> 0), 0);
> -   unsigned func = CLAMP(num_channels, 1, 3) - 1;
> +   unsigned func = CLAMP(num_channels, 1, 4) - 1;
>
> -   LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v4f32};
> -   const char *type_names[] = {"f32", "v2f32", "v4f32"};
> +   if (HAVE_LLVM == 0x800 && func == 2)
> +   func = 3; /* Only LLVM 9+ supports vec3 */
> +
> +   LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v3f32,
> ctx->v4f32};
> +   const char *type_names[] = {"f32", "v2f32", "v3f32", "v4f32"};
> const char *indexing_kind = structurized ? "struct" : "raw";
> char name[256];
>
> @@ -1490,10 +1497,13 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context
> *ctx,
> args[idx++] = soffset ? soffset : ctx->i32_0;
> args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
> args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 :
> 0), 0);
> -   unsigned func = CLAMP(num_channels, 1, 3) - 1;
> +   unsigned func = CLAMP(num_channels, 1, 4) - 1;
>
> -   LLVMTypeRef types[] = {ctx->i32, ctx->v2i32, ctx->v4i32};
> -   const char *type_names[] = {"i32", "v2i32", "v4i32"};
> +   if (HAVE_LLVM == 0x800 && func == 2)
> +   func = 3; /* Only LLVM 9+ supports vec3 */
> +
> +   LLVMTypeRef types[] = {ctx->i32, ctx->v2i32, ctx->v3i32,
> ctx->v4i32};
> +   const char *type_names[] = {"i32", "v2i32", "v3i32", "v4i32"};
> const char *indexing_kind = structurized ? "struct" : "raw";
> char name[256];
>
> @@ -1651,9 +1661,12 @@ ac_build_llvm8_tbuffer_store(struct ac_llvm_context
> *ctx,
> args[idx++] = soffset ? soffset : ctx->i32_0;
> args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
> args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 :
> 0), 0);
> -   unsigned func = CLAMP(num_channels, 1, 3) - 1;
> +   unsigned func = CLAMP(num_channels, 1, 4) - 1;
>
> -   const char *type_names[] = {"i32", "v2i32", "v4i32"};
> +   if (HAVE_LLVM == 0x800 && func == 2)
> +   func = 3; /* Only LLVM 9+ supports vec3 */
> +
> +   const char *type_names[] = {"i32", "v2i32", "v3i32", "v4i32"};
> const char *indexing_kind = structurized ? "struct" : "raw";
> char name[256];
>
> diff --git a/src/amd/common/ac_llvm_build.h
> b/src/amd/common/ac_llvm_build.h
> index 9151c743bed..d2f8cd5e08b 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -71,6 +71,7 @@ struct 

Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Bas Nieuwenhuizen
R-b

Though not sure it really helps given code size increase?

On Wed, Mar 27, 2019, 10:13 AM Samuel Pitoiset 
wrote:

> This helps few compute shaders, mostly for F12017.
>
> 27670 shaders in 14347 tests
> Totals:
> SGPRS: 1231173 -> 1231173 (0.00 %)
> VGPRS: 866056 -> 865928 (-0.01 %)
> Spilled SGPRs: 24201 -> 24201 (0.00 %)
> Code Size: 46137040 -> 46144868 (0.02 %) bytes
> Max Waves: 232287 -> 232302 (0.01 %)
>
> Totals from affected shaders:
> SGPRS: 24624 -> 24624 (0.00 %)
> VGPRS: 25960 -> 25832 (-0.49 %)
> Spilled SGPRs: 0 -> 0 (0.00 %)
> Code Size: 2922632 -> 2930460 (0.27 %) bytes
> Max Waves: 1216 -> 1231 (1.23 %)
>
> Suggested-by: 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 8 
>  src/amd/vulkan/radv_shader.c| 5 -
>  2 files changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c
> b/src/amd/common/ac_nir_to_llvm.c
> index b25cc6a0a84..c46d98e6dd9 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct ac_nir_context
> *ctx,
> case nir_intrinsic_vulkan_resource_reindex:
> result = visit_vulkan_resource_reindex(ctx, instr);
> break;
> +   case nir_intrinsic_load_vulkan_descriptor: {
> +   LLVMValueRef values[2] = {
> +   get_src(ctx, instr->src[0]),
> +   ctx->ac.i32_0,
> +   };
> +   result = ac_build_gather_values(>ac, values, 2);
> +   break;
> +   }
> case nir_intrinsic_store_ssbo:
> visit_store_ssbo(ctx, instr);
> break;
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 19a807df199..2751302e8b9 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
> }
> }
> const struct spirv_to_nir_options spirv_options = {
> -   .lower_ubo_ssbo_access_to_offsets = true,
> .caps = {
> .descriptor_array_dynamic_indexing = true,
> .device_group = true,
> @@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct radv_device *device,
> NIR_PASS_V(nir, nir_lower_system_values);
> NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
> NIR_PASS_V(nir, nir_lower_frexp);
> +
> +   NIR_PASS_V(nir, nir_lower_explicit_io,
> + nir_var_mem_ubo | nir_var_mem_ssbo,
> + nir_address_format_32bit_index_offset);
> }
>
> /* Vulkan uses the separate-shader linking model */
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110268] enable storageBuffer16BitAccess feature in radv for SI and CIK

2019-03-28 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110268

--- Comment #2 from nihui  ---
(In reply to Samuel Pitoiset from comment #1)
> Where can I find this benchmark?

Hi

The benchmark is the benchncnn program in repo
https://github.com/Tencent/ncnn

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110268] enable storageBuffer16BitAccess feature in radv for SI and CIK

2019-03-28 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110268

--- Comment #1 from Samuel Pitoiset  ---
Where can I find this benchmark?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110266] gpu error

2019-03-28 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110266

Christian König  changed:

   What|Removed |Added

 QA Contact|mesa-dev@lists.freedesktop. |
   |org |
Product|Mesa|Spam
   Assignee|mesa-dev@lists.freedesktop. |dan...@fooishbar.org
   |org |
  Component|Drivers/Vulkan/radeon   |Two

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev