[Mesa-dev] [PATCH] mesa: Factor out _mesa_disable_vertex_array_attrib.

2018-02-05 Thread Mathias . Froehlich
From: Mathias Fröhlich 

Hi,

Simple code deduplication and factoring out a function that
will be usefull soon.

please review

thanks!!

Mathias



And use it in the enable code path.
Move _mesa_update_attribute_map_mode into its only remaining file.

Signed-off-by: Mathias Fröhlich 
---
 src/mesa/main/arrayobj.h | 26 
 src/mesa/main/enable.c   | 64 ++--
 src/mesa/main/varray.c   | 58 ---
 src/mesa/main/varray.h   |  7 ++
 4 files changed, 75 insertions(+), 80 deletions(-)

diff --git a/src/mesa/main/arrayobj.h b/src/mesa/main/arrayobj.h
index 411ed65c50..5de74505bb 100644
--- a/src/mesa/main/arrayobj.h
+++ b/src/mesa/main/arrayobj.h
@@ -99,32 +99,6 @@ extern const GLubyte
 _mesa_vao_attribute_map[ATTRIBUTE_MAP_MODE_MAX][VERT_ATTRIB_MAX];
 
 
-/**
- * Depending on the position and generic0 attributes enable flags select
- * the one that is used for both attributes.
- * The generic0 attribute takes precedence.
- */
-static inline void
-_mesa_update_attribute_map_mode(const struct gl_context *ctx,
-struct gl_vertex_array_object *vao)
-{
-   /*
-* There is no need to change the mapping away from the
-* identity mapping if we are not in compat mode.
-*/
-   if (ctx->API != API_OPENGL_COMPAT)
-  return;
-   /* The generic0 attribute superseeds the position attribute */
-   const GLbitfield enabled = vao->_Enabled;
-   if (enabled & VERT_BIT_GENERIC0)
-  vao->_AttributeMapMode = ATTRIBUTE_MAP_MODE_GENERIC0;
-   else if (enabled & VERT_BIT_POS)
-  vao->_AttributeMapMode = ATTRIBUTE_MAP_MODE_POSITION;
-   else
-  vao->_AttributeMapMode = ATTRIBUTE_MAP_MODE_IDENTITY;
-}
-
-
 /**
  * Apply the position/generic0 aliasing map to a bitfield from the vao.
  * Use for example to convert gl_vertex_array_object::_Enabled
diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
index bc22410bda..967d23080c 100644
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -40,6 +40,7 @@
 #include "mtypes.h"
 #include "enums.h"
 #include "texstate.h"
+#include "varray.h"
 
 
 
@@ -58,55 +59,56 @@ update_derived_primitive_restart_state(struct gl_context 
*ctx)
   || ctx->Array.PrimitiveRestartFixedIndex;
 }
 
+
+/**
+ * Helper to enable/disable VAO client-side state.
+ */
+static void
+vao_state(struct gl_context *ctx, gl_vert_attrib attr, GLboolean state)
+{
+   if (state)
+  _mesa_enable_vertex_array_attrib(ctx, ctx->Array.VAO, attr);
+   else
+  _mesa_disable_vertex_array_attrib(ctx, ctx->Array.VAO, attr);
+}
+
+
 /**
  * Helper to enable/disable client-side state.
  */
 static void
 client_state(struct gl_context *ctx, GLenum cap, GLboolean state)
 {
-   struct gl_vertex_array_object *vao = ctx->Array.VAO;
-   GLbitfield vert_attrib_bit;
-   GLboolean *enable_var;
-
switch (cap) {
   case GL_VERTEX_ARRAY:
- enable_var = >VertexAttrib[VERT_ATTRIB_POS].Enabled;
- vert_attrib_bit = VERT_BIT_POS;
+ vao_state(ctx, VERT_ATTRIB_POS, state);
  break;
   case GL_NORMAL_ARRAY:
- enable_var = >VertexAttrib[VERT_ATTRIB_NORMAL].Enabled;
- vert_attrib_bit = VERT_BIT_NORMAL;
+ vao_state(ctx, VERT_ATTRIB_NORMAL, state);
  break;
   case GL_COLOR_ARRAY:
- enable_var = >VertexAttrib[VERT_ATTRIB_COLOR0].Enabled;
- vert_attrib_bit = VERT_BIT_COLOR0;
+ vao_state(ctx, VERT_ATTRIB_COLOR0, state);
  break;
   case GL_INDEX_ARRAY:
- enable_var = >VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled;
- vert_attrib_bit = VERT_BIT_COLOR_INDEX;
+ vao_state(ctx, VERT_ATTRIB_COLOR_INDEX, state);
  break;
   case GL_TEXTURE_COORD_ARRAY:
- enable_var = 
>VertexAttrib[VERT_ATTRIB_TEX(ctx->Array.ActiveTexture)].Enabled;
- vert_attrib_bit = VERT_BIT_TEX(ctx->Array.ActiveTexture);
+ vao_state(ctx, VERT_ATTRIB_TEX(ctx->Array.ActiveTexture), state);
  break;
   case GL_EDGE_FLAG_ARRAY:
- enable_var = >VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled;
- vert_attrib_bit = VERT_BIT_EDGEFLAG;
+ vao_state(ctx, VERT_ATTRIB_EDGEFLAG, state);
  break;
   case GL_FOG_COORDINATE_ARRAY_EXT:
- enable_var = >VertexAttrib[VERT_ATTRIB_FOG].Enabled;
- vert_attrib_bit = VERT_BIT_FOG;
+ vao_state(ctx, VERT_ATTRIB_FOG, state);
  break;
   case GL_SECONDARY_COLOR_ARRAY_EXT:
- enable_var = >VertexAttrib[VERT_ATTRIB_COLOR1].Enabled;
- vert_attrib_bit = VERT_BIT_COLOR1;
+ vao_state(ctx, VERT_ATTRIB_COLOR1, state);
  break;
 
   case GL_POINT_SIZE_ARRAY_OES:
- enable_var = >VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled;
- vert_attrib_bit = VERT_BIT_POINT_SIZE;
  FLUSH_VERTICES(ctx, _NEW_PROGRAM);
  ctx->VertexProgram.PointSizeEnabled = 

[Mesa-dev] [PATCH] mesa: Use atomics for buffer objects reference counts.

2018-02-05 Thread Mathias . Froehlich
From: Mathias Fröhlich 

Hi all,

please review

best

Mathias


The mutex is currently used for reference counting and updating
the minmax index cache.
The change uses atomics directly for reference counting and
the mutex for the minmax cache.
This is safe since the reference count is not modified beside
in _mesa_reference_buffer_object where atomics aim to be used.
While using the minmax cache, the calling code holds a reference
to the buffer object. Thus unreferencing or even referencing the
buffer object does not need to be serialized with accessing
the minmax cache.
The change reduces the time _mesa_reference_buffer_object_ takes
by about a factor of two when looking at perf results for some
of my favorite use cases.

Signed-off-by: Mathias Fröhlich 
---
 src/mesa/main/bufferobj.c   | 22 ++
 src/mesa/main/mtypes.h  |  2 +-
 src/mesa/vbo/vbo_minmax_index.c |  8 
 3 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index c1dfdfba82..67f9cd0a90 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -46,6 +46,7 @@
 #include "texstore.h"
 #include "transformfeedback.h"
 #include "varray.h"
+#include "util/u_atomic.h"
 
 
 /* Debug flags */
@@ -471,7 +472,7 @@ _mesa_delete_buffer_object(struct gl_context *ctx,
bufObj->RefCount = -1000;
bufObj->Name = ~0;
 
-   simple_mtx_destroy(>Mutex);
+   simple_mtx_destroy(>MinMaxCacheMutex);
free(bufObj->Label);
free(bufObj);
 }
@@ -490,16 +491,9 @@ _mesa_reference_buffer_object_(struct gl_context *ctx,
 {
if (*ptr) {
   /* Unreference the old buffer */
-  GLboolean deleteFlag = GL_FALSE;
   struct gl_buffer_object *oldObj = *ptr;
 
-  simple_mtx_lock(>Mutex);
-  assert(oldObj->RefCount > 0);
-  oldObj->RefCount--;
-  deleteFlag = (oldObj->RefCount == 0);
-  simple_mtx_unlock(>Mutex);
-
-  if (deleteFlag) {
+  if (p_atomic_dec_zero(>RefCount)) {
 assert(ctx->Driver.DeleteBuffer);
  ctx->Driver.DeleteBuffer(ctx, oldObj);
   }
@@ -510,12 +504,8 @@ _mesa_reference_buffer_object_(struct gl_context *ctx,
 
if (bufObj) {
   /* reference new buffer */
-  simple_mtx_lock(>Mutex);
-  assert(bufObj->RefCount > 0);
-
-  bufObj->RefCount++;
+  p_atomic_inc(>RefCount);
   *ptr = bufObj;
-  simple_mtx_unlock(>Mutex);
}
 }
 
@@ -547,11 +537,11 @@ _mesa_initialize_buffer_object(struct gl_context *ctx,
GLuint name)
 {
memset(obj, 0, sizeof(struct gl_buffer_object));
-   simple_mtx_init(>Mutex, mtx_plain);
obj->RefCount = 1;
obj->Name = name;
obj->Usage = GL_STATIC_DRAW_ARB;
 
+   simple_mtx_init(>MinMaxCacheMutex, mtx_plain);
if (get_no_minmax_cache())
   obj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
 }
@@ -870,7 +860,7 @@ _mesa_init_buffer_objects( struct gl_context *ctx )
GLuint i;
 
memset(, 0, sizeof(DummyBufferObject));
-   simple_mtx_init(, mtx_plain);
+   simple_mtx_init(, mtx_plain);
DummyBufferObject.RefCount = 1000*1000*1000; /* never delete */
 
_mesa_reference_buffer_object(ctx, >Array.ArrayBufferObj,
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 3a67d43420..b6d606386e 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1443,7 +1443,6 @@ typedef enum
  */
 struct gl_buffer_object
 {
-   simple_mtx_t Mutex;
GLint RefCount;
GLuint Name;
GLchar *Label;   /**< GL_KHR_debug */
@@ -1464,6 +1463,7 @@ struct gl_buffer_object
struct gl_buffer_mapping Mappings[MAP_COUNT];
 
/** Memoization of min/max index computations for static index buffers */
+   simple_mtx_t MinMaxCacheMutex;
struct hash_table *MinMaxCache;
unsigned MinMaxCacheHitIndices;
unsigned MinMaxCacheMissIndices;
diff --git a/src/mesa/vbo/vbo_minmax_index.c b/src/mesa/vbo/vbo_minmax_index.c
index c9d2020167..d1298dcdc3 100644
--- a/src/mesa/vbo/vbo_minmax_index.c
+++ b/src/mesa/vbo/vbo_minmax_index.c
@@ -115,7 +115,7 @@ vbo_get_minmax_cached(struct gl_buffer_object *bufferObj,
if (!vbo_use_minmax_cache(bufferObj))
   return GL_FALSE;
 
-   simple_mtx_lock(>Mutex);
+   simple_mtx_lock(>MinMaxCacheMutex);
 
if (bufferObj->MinMaxCacheDirty) {
   /* Disable the cache permanently for this BO if the number of hits
@@ -166,7 +166,7 @@ out_invalidate:
}
 
 out_disable:
-   simple_mtx_unlock(>Mutex);
+   simple_mtx_unlock(>MinMaxCacheMutex);
return found;
 }
 
@@ -184,7 +184,7 @@ vbo_minmax_cache_store(struct gl_context *ctx,
if (!vbo_use_minmax_cache(bufferObj))
   return;
 
-   simple_mtx_lock(>Mutex);
+   simple_mtx_lock(>MinMaxCacheMutex);
 
if (!bufferObj->MinMaxCache) {
   bufferObj->MinMaxCache =
@@ -223,7 +223,7 @@ vbo_minmax_cache_store(struct gl_context *ctx,
   free(entry);
 
 out:
-   simple_mtx_unlock(>Mutex);
+   

[Mesa-dev] [PATCH] vbo: Move vbo_rebase into its only caller module tnl.

2018-02-05 Thread Mathias . Froehlich
From: Mathias Fröhlich 

Hi all,

The change move vbo_rebase_prims into the tnl module.
The tnl module is the only user of this function.

please review

best

Mathias

Signed-off-by: Mathias Fröhlich 
---
 src/mesa/Makefile.sources |  3 ++-
 src/mesa/meson.build  |  2 +-
 src/mesa/tnl/t_draw.c |  7 ++---
 src/mesa/{vbo/vbo_rebase.c => tnl/t_rebase.c} | 18 ++---
 src/mesa/tnl/t_rebase.h   | 39 +++
 src/mesa/vbo/vbo.h| 11 
 6 files changed, 55 insertions(+), 25 deletions(-)
 rename src/mesa/{vbo/vbo_rebase.c => tnl/t_rebase.c} (94%)
 create mode 100644 src/mesa/tnl/t_rebase.h

diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 880f379eb1..0a9aad52d0 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -370,6 +370,8 @@ TNL_FILES = \
tnl/tnl.h \
tnl/t_pipeline.c \
tnl/t_pipeline.h \
+   tnl/t_rebase.c \
+   tnl/t_rebase.h \
tnl/t_vb_cliptmp.h \
tnl/t_vb_fog.c \
tnl/t_vb_light.c \
@@ -405,7 +407,6 @@ VBO_FILES = \
vbo/vbo_noop.h \
vbo/vbo_primitive_restart.c \
vbo/vbo_private.h \
-   vbo/vbo_rebase.c \
vbo/vbo_save_api.c \
vbo/vbo_save.c \
vbo/vbo_save_draw.c \
diff --git a/src/mesa/meson.build b/src/mesa/meson.build
index a74c39d29e..aa27d59264 100644
--- a/src/mesa/meson.build
+++ b/src/mesa/meson.build
@@ -338,7 +338,6 @@ files_libmesa_common = files(
   'vbo/vbo_noop.c',
   'vbo/vbo_noop.h',
   'vbo/vbo_primitive_restart.c',
-  'vbo/vbo_rebase.c',
   'vbo/vbo_save_api.c',
   'vbo/vbo_save.c',
   'vbo/vbo_save_draw.c',
@@ -366,6 +365,7 @@ files_libmesa_classic = files(
   'tnl/tnl.h',
   'tnl/t_pipeline.c',
   'tnl/t_pipeline.h',
+  'tnl/t_rebase.c',
   'tnl/t_vb_cliptmp.h',
   'tnl/t_vb_fog.c',
   'tnl/t_vb_light.c',
diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index 9fca4da1f4..c19d77d641 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -38,6 +38,7 @@
 #include "util/half_float.h"
 
 #include "t_context.h"
+#include "t_rebase.h"
 #include "tnl.h"
 
 
@@ -461,9 +462,9 @@ void _tnl_draw_prims(struct gl_context *ctx,
if (min_index) {
   /* We always translate away calls with min_index != 0. 
*/
-  vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, 
-   min_index, max_index,
-   _tnl_draw_prims );
+  t_rebase_prims( ctx, arrays, prim, nr_prims, ib,
+  min_index, max_index,
+  _tnl_draw_prims );
   return;
}
else if ((GLint)max_index + max_basevertex > max) {
diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/tnl/t_rebase.c
similarity index 94%
rename from src/mesa/vbo/vbo_rebase.c
rename to src/mesa/tnl/t_rebase.c
index 02dbc68dcb..b781781cb0 100644
--- a/src/mesa/vbo/vbo_rebase.c
+++ b/src/mesa/tnl/t_rebase.c
@@ -51,7 +51,7 @@
 #include "main/imports.h"
 #include "main/mtypes.h"
 
-#include "vbo.h"
+#include "t_rebase.h"
 
 
 #define REBASE(TYPE)   \
@@ -100,14 +100,14 @@ REBASE(GLubyte)
  *- can't save time by trying to upload half a vbo - typically it is
  *  all or nothing.
  */
-void vbo_rebase_prims( struct gl_context *ctx,
-  const struct gl_vertex_array *arrays[],
-  const struct _mesa_prim *prim,
-  GLuint nr_prims,
-  const struct _mesa_index_buffer *ib,
-  GLuint min_index,
-  GLuint max_index,
-  vbo_draw_func draw )
+void t_rebase_prims( struct gl_context *ctx,
+ const struct gl_vertex_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLuint min_index,
+ GLuint max_index,
+ vbo_draw_func draw )
 {
struct gl_vertex_array tmp_arrays[VERT_ATTRIB_MAX];
const struct gl_vertex_array *tmp_array_pointers[VERT_ATTRIB_MAX];
diff --git a/src/mesa/tnl/t_rebase.h b/src/mesa/tnl/t_rebase.h
new file mode 100644
index 00..35175868d5
--- /dev/null
+++ b/src/mesa/tnl/t_rebase.h
@@ -0,0 +1,39 @@
+/*
+ * mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to 

Re: [Mesa-dev] [PATCH] r600/atomic: fix ATOMCAS instruction.

2018-02-05 Thread Roland Scheidegger
Am 05.02.2018 um 07:47 schrieb Dave Airlie:
> From: Dave Airlie 
> 
> This has 3 srcs.
Depends on how you count :-).

Reviewed-by: Roland Scheidegger 

> 
> This fixes:
> KHR-GL45.shader_atomic_counter_ops_tests.ShaderAtomicCounterOpsExchangeTestCase
> 
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/drivers/r600/r600_shader.c | 32 +++-
>  1 file changed, 31 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index 33eb5accea..4c0d554d1a 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -8698,6 +8698,33 @@ static int tgsi_atomic_op_gds(struct r600_shader_ctx 
> *ctx)
>   if (r)
>   return r;
>  
> + if (gds_op == FETCH_OP_GDS_CMP_XCHG_RET) {
> + if (inst->Src[3].Register.File == TGSI_FILE_IMMEDIATE) {
> + int value = (ctx->literals[4 * 
> inst->Src[3].Register.Index + inst->Src[3].Register.SwizzleX]);
> + memset(, 0, sizeof(struct r600_bytecode_alu));
> + alu.op = ALU_OP1_MOV;
> + alu.dst.sel = ctx->temp_reg;
> + alu.dst.chan = is_cm ? 2 : 1;
> + alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
> + alu.src[0].value = value;
> + alu.last = 1;
> + alu.dst.write = 1;
> + r = r600_bytecode_add_alu(ctx->bc, );
> + if (r)
> + return r;
> + } else {
> + memset(, 0, sizeof(struct r600_bytecode_alu));
> + alu.op = ALU_OP1_MOV;
> + alu.dst.sel = ctx->temp_reg;
> + alu.dst.chan = is_cm ? 2 : 1;
> + r600_bytecode_src([0], >src[3], 0);
> + alu.last = 1;
> + alu.dst.write = 1;
> + r = r600_bytecode_add_alu(ctx->bc, );
> + if (r)
> + return r;
> + }
> + }
>   if (inst->Src[2].Register.File == TGSI_FILE_IMMEDIATE) {
>   int value = (ctx->literals[4 * inst->Src[2].Register.Index + 
> inst->Src[2].Register.SwizzleX]);
>   int abs_value = abs(value);
> @@ -8737,7 +8764,10 @@ static int tgsi_atomic_op_gds(struct r600_shader_ctx 
> *ctx)
>   gds.src_gpr2 = 0;
>   gds.src_sel_x = is_cm ? 0 : 4;
>   gds.src_sel_y = is_cm ? 1 : 0;
> - gds.src_sel_z = 7;
> + if (gds_op == FETCH_OP_GDS_CMP_XCHG_RET)
> + gds.src_sel_z = is_cm ? 2 : 1;
> + else
> + gds.src_sel_z = 7;
>   gds.dst_sel_x = 0;
>   gds.dst_sel_y = 7;
>   gds.dst_sel_z = 7;
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/9] r600: overhaul buffer resource query.

2018-02-05 Thread Roland Scheidegger
Am 06.02.2018 um 06:04 schrieb Dave Airlie:
> On 6 February 2018 at 14:12, Roland Scheidegger  wrote:
>> Am 05.02.2018 um 05:29 schrieb Dave Airlie:
>>> From: Dave Airlie 
>>>
>>> This cleans up and fixes the previous fix even more.
>>>
>>> Buffers from textures start at max const,
>>> buffers from buffers/images come in from the 168 offset.
>>>
>>> This fixes a bunch of:
>>> KHR-GL45.shader_storage_buffer_object*
>>>
>>> Signed-off-by: Dave Airlie 
>>> ---
>>>  src/gallium/drivers/r600/r600_shader.c | 15 ---
>>>  1 file changed, 8 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/r600/r600_shader.c 
>>> b/src/gallium/drivers/r600/r600_shader.c
>>> index 8c4460a5d5..32f24c071d 100644
>>> --- a/src/gallium/drivers/r600/r600_shader.c
>>> +++ b/src/gallium/drivers/r600/r600_shader.c
>>> @@ -7007,7 +7007,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx 
>>> *ctx, boolean src_requires_l
>>>   return 0;
>>>  }
>>>
>>> -static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, 
>>> int offset)
>>> +static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, 
>>> int offset, int eg_buffer_base)
>>
>> I think it would be nicer if you'd just stick to the offset parameter
>> here. Just add both together in the caller - as far as this function is
>> concerned two offsets don't really make sense.
> 
> 
> Well it matters for the r600 path if I'm not mistaken. It just wants
> id + offset, not the MAX_CONST or
> other bits.
> 

Ah yes, right.
Maybe things would be more obvious if r600_do_buffer_txq would just
figure out offsets on its own? It already knows what the src reg is, so
determining offset for sampler/buffer/image could be easily be done there.
But as long as it works...

Roland
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600/sb/cayman: fix indirect ubo access on cayman

2018-02-05 Thread Roland Scheidegger
Am 05.02.2018 um 07:05 schrieb Dave Airlie:
> From: Dave Airlie 
> 
> With sb enabled on cayman, this was overwriting the proper
> cf index value with random ones if the dst gpr was 2 or 3,
> only save the value for a MOVA instruction.
> 
> Fixes:
> KHR-GL45.gpu_shader5.uniform_blocks_array_indexing
> (on cayman with sb)
> 
> Signed-off-by: Dave Airlie 

Reviewed-by: Roland Scheidegger 
> ---
>  src/gallium/drivers/r600/sb/sb_bc_parser.cpp | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp 
> b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
> index 970e4141d5..87035ee2a6 100644
> --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
> +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
> @@ -567,7 +567,7 @@ int bc_parser::prepare_alu_group(cf_node* cf, 
> alu_group_node *g) {
>   n->src.push_back(get_cf_index_value(1));
>   }
>  
> - if ((n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr 
> == CM_V_SQ_MOVA_DST_CF_IDX1) &&
> + if ((flags & AF_MOVA) && (n->bc.dst_gpr == 
> CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) &&
>   ctx.is_cayman())
>   // Move CF_IDX value into tex instruction operands, 
> scheduler will later re-emit setting of CF_IDX
>   save_set_cf_index(n->src[0], n->bc.dst_gpr == 
> CM_V_SQ_MOVA_DST_CF_IDX1);
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] mesa: add xbgr support adjacent to xrgb

2018-02-05 Thread Ilia Mirkin
On Mon, Feb 5, 2018 at 6:42 AM, Ilia Mirkin  wrote:
> On Mon, Feb 5, 2018 at 6:24 AM, Daniel Stone  wrote:
>> Hi Ilia,
>>
>> On 4 February 2018 at 19:09, Ilia Mirkin  wrote:
>>> One might have split this up into multiple patches, but it's just very
>>> repetitive and similar code.
>>
>> You probably want to add this into gbm_to_dri_image_formats[] inside
>> src/gbm/backends/dri/gbm_dri.c, so Wayland compositors running on KMS
>> and xf86-video-modesetting can also use these modes.
>>
>> Grepping for XRGB210 inside src/egl/drivers/dri2/platform_wayland.c
>> would also show the fairly obvious points to add it there.
>
> I thought I covered all the places where this was done, but obviously
> not. Perhaps in my euphoria of getting glxgears displaying the correct
> colors I decided I was done.
>
> I'll do another passthrough, and definitely hit that file explicitly.

So I've been looking at platform_wayland and platform_x11, and it's
not totally clear to me how to make it all work. I'm stuck with e.g.

dri2_wl_create_window_surface
dri3_create_image_khr_pixmap

and others, wrt how to determine whether I should pick a XBGR or XRGB
format. I suspect that more information has to be stored somewhere.
Perhaps the _EGLConfig? Even that wouldn't cover everything though.
This is all quite foreign to me, and I'm not sure how it's supposed to
all fit together. Perhaps it can be left until later?

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600: fixup sparse color exports.

2018-02-05 Thread Roland Scheidegger
Am 05.02.2018 um 05:58 schrieb Dave Airlie:
> From: Dave Airlie 
> 
> If we have gaps in the shader mask we have to have 0x1 in them
> according to a comment in radeonsi, and this is required to fix
> the test at least on cayman.
> 
> We also need to record the highest one written to write to the
> ps exports reg.
> 
> This fixes:
> KHR-GL45.enhanced_layouts.fragment_data_location_api

Does that mean there's actually a performance benefit when there's no
gaps? That's something the APIs wouldn't tell you :-).

Reviewed-by: Roland Scheidegger 

> 
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/drivers/r600/evergreen_state.c |  2 +-
>  src/gallium/drivers/r600/r600_shader.c | 10 ++
>  src/gallium/drivers/r600/r600_shader.h |  1 +
>  3 files changed, 12 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/r600/evergreen_state.c 
> b/src/gallium/drivers/r600/evergreen_state.c
> index 4c9163c2a7..742ca5babb 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -3369,7 +3369,7 @@ void evergreen_update_ps_state(struct pipe_context 
> *ctx, struct r600_pipe_shader
>   exports_ps |= 1;
>   }
>  
> - num_cout = rshader->nr_ps_color_exports;
> + num_cout = rshader->ps_export_highest + 1;
>  
>   exports_ps |= S_02884C_EXPORT_COLORS(num_cout);
>   if (!exports_ps) {
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index 72e3063804..33eb5accea 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -3876,6 +3876,16 @@ static int r600_shader_from_tgsi(struct r600_context 
> *rctx,
>   output[j].type = 
> V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
>   shader->nr_ps_color_exports++;
>   shader->ps_color_export_mask |= (0xf << 
> (shader->output[i].sid * 4));
> +
> + /* If the i-th target format is set, 
> all previous target formats must
> +  * be non-zero to avoid hangs. - from 
> radeonsi, seems to apply to eg as well.
> +  */
> + if (shader->output[i].sid > 0)
> + for (unsigned x = 0; x < 
> shader->output[i].sid; x++)
> + 
> shader->ps_color_export_mask |= (1 << (x*4));
> +
> + if (shader->output[i].sid > 
> shader->ps_export_highest)
> + shader->ps_export_highest = 
> shader->output[i].sid;
>   if (shader->fs_write_all && 
> (rscreen->b.chip_class >= EVERGREEN)) {
>   for (k = 1; k < 
> max_color_exports; k++) {
>   j++;
> diff --git a/src/gallium/drivers/r600/r600_shader.h 
> b/src/gallium/drivers/r600/r600_shader.h
> index 7fca3f455e..4b23facf6f 100644
> --- a/src/gallium/drivers/r600/r600_shader.h
> +++ b/src/gallium/drivers/r600/r600_shader.h
> @@ -85,6 +85,7 @@ struct r600_shader {
>   /* Real number of ps color exports compiled in the bytecode */
>   unsignednr_ps_color_exports;
>   unsignedps_color_export_mask;
> + unsignedps_export_highest;
>   /* bit n is set if the shader writes gl_ClipDistance[n] */
>   unsignedcc_dist_mask;
>   unsignedclip_dist_write;
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/9] r600: overhaul buffer resource query.

2018-02-05 Thread Dave Airlie
On 6 February 2018 at 14:12, Roland Scheidegger  wrote:
> Am 05.02.2018 um 05:29 schrieb Dave Airlie:
>> From: Dave Airlie 
>>
>> This cleans up and fixes the previous fix even more.
>>
>> Buffers from textures start at max const,
>> buffers from buffers/images come in from the 168 offset.
>>
>> This fixes a bunch of:
>> KHR-GL45.shader_storage_buffer_object*
>>
>> Signed-off-by: Dave Airlie 
>> ---
>>  src/gallium/drivers/r600/r600_shader.c | 15 ---
>>  1 file changed, 8 insertions(+), 7 deletions(-)
>>
>> diff --git a/src/gallium/drivers/r600/r600_shader.c 
>> b/src/gallium/drivers/r600/r600_shader.c
>> index 8c4460a5d5..32f24c071d 100644
>> --- a/src/gallium/drivers/r600/r600_shader.c
>> +++ b/src/gallium/drivers/r600/r600_shader.c
>> @@ -7007,7 +7007,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx 
>> *ctx, boolean src_requires_l
>>   return 0;
>>  }
>>
>> -static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int 
>> offset)
>> +static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int 
>> offset, int eg_buffer_base)
>
> I think it would be nicer if you'd just stick to the offset parameter
> here. Just add both together in the caller - as far as this function is
> concerned two offsets don't really make sense.


Well it matters for the r600 path if I'm not mistaken. It just wants
id + offset, not the MAX_CONST or
other bits.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] ac: add 64bit support to ac_find_lsb()

2018-02-05 Thread Timothy Arceri
---
 src/amd/common/ac_llvm_build.c | 22 --
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 08c488775e..0764d8c7f9 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1984,6 +1984,20 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
 LLVMTypeRef dst_type,
 LLVMValueRef src0)
 {
+   unsigned src0_bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
+   const char *intrin_name;
+   LLVMTypeRef type;
+   LLVMValueRef zero;
+   if (src0_bitsize == 64) {
+   intrin_name = "llvm.cttz.i64";
+   type = ctx->i64;
+   zero = ctx->i64_0;
+   } else {
+   intrin_name = "llvm.cttz.i32";
+   type = ctx->i32;
+   zero = ctx->i32_0;
+   }
+
LLVMValueRef params[2] = {
src0,
 
@@ -1999,15 +2013,19 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
LLVMConstInt(ctx->i1, 1, false),
};
 
-   LLVMValueRef lsb = ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32,
+   LLVMValueRef lsb = ac_build_intrinsic(ctx, intrin_name, type,
  params, 2,
  AC_FUNC_ATTR_READNONE);
 
+   if (src0_bitsize == 64) {
+   lsb = ac_unpack_64_2x32_split_x(ctx, lsb);
+   }
+
/* TODO: We need an intrinsic to skip this conditional. */
/* Check for zero: */
return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder,
   LLVMIntEQ, src0,
-  ctx->i32_0, ""),
+  zero, ""),
   LLVMConstInt(ctx->i32, -1, 0), lsb, "");
 }
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] ac: create ac_unpack_64_2x32_split_x() helper

2018-02-05 Thread Timothy Arceri
This will be used in the following commits.
---
 src/amd/common/ac_llvm_build.c  | 8 
 src/amd/common/ac_llvm_build.h  | 3 +++
 src/amd/common/ac_nir_to_llvm.c | 6 +-
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index a86ba962fa..6375b106f7 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1992,6 +1992,14 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
   LLVMConstInt(ctx->i32, -1, 0), lsb, "");
 }
 
+LLVMValueRef ac_unpack_64_2x32_split_x(struct ac_llvm_context *ctx,
+  LLVMValueRef src0)
+{
+   LLVMValueRef tmp = LLVMBuildBitCast(ctx->builder, src0,
+   ctx->v2i32, "");
+   return LLVMBuildExtractElement(ctx->builder, tmp, ctx->i32_0, "");
+}
+
 LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type)
 {
return LLVMPointerType(LLVMArrayType(elem_type, 0),
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 47c843fb4b..78991b3e99 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -348,6 +348,9 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
 LLVMTypeRef dst_type,
 LLVMValueRef src0);
 
+LLVMValueRef ac_unpack_64_2x32_split_x(struct ac_llvm_context *ctx,
+  LLVMValueRef src0);
+
 LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type);
 
 #ifdef __cplusplus
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 9a9db2dce9..ac4af12b3e 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2042,11 +2042,7 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
 
case nir_op_unpack_64_2x32_split_x: {
assert(instr->src[0].src.ssa->num_components == 1);
-   LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
-   ctx->ac.v2i32,
-   "");
-   result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
-ctx->ac.i32_0, "");
+   result = ac_unpack_64_2x32_split_x(>ac, src[0]);
break;
}
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] ac: move get_elem_bits() to ac_llvm_build.c

2018-02-05 Thread Timothy Arceri
---
 src/amd/common/ac_llvm_build.c  | 19 +++
 src/amd/common/ac_llvm_build.h  |  3 +++
 src/amd/common/ac_nir_to_llvm.c | 34 --
 3 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 6375b106f7..08c488775e 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -128,6 +128,25 @@ ac_llvm_extract_elem(struct ac_llvm_context *ac,
   LLVMConstInt(ac->i32, index, false), "");
 }
 
+int
+ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
+{
+   if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
+   type = LLVMGetElementType(type);
+
+   if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
+   return LLVMGetIntTypeWidth(type);
+
+   if (type == ctx->f16)
+   return 16;
+   if (type == ctx->f32)
+   return 32;
+   if (type == ctx->f64)
+   return 64;
+
+   unreachable("Unhandled type kind in get_elem_bits");
+}
+
 unsigned
 ac_get_type_size(LLVMTypeRef type)
 {
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 78991b3e99..fa09bd10a5 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -92,6 +92,9 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, 
LLVMContextRef context,
 int
 ac_get_llvm_num_components(LLVMValueRef value);
 
+int
+ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type);
+
 LLVMValueRef
 ac_llvm_extract_elem(struct ac_llvm_context *ac,
 LLVMValueRef value,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index e06a22f8a9..e284795fdc 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -329,24 +329,6 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef 
module,
return main_function;
 }
 
-static int get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
-{
-   if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
-   type = LLVMGetElementType(type);
-
-   if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
-   return LLVMGetIntTypeWidth(type);
-
-   if (type == ctx->f16)
-   return 16;
-   if (type == ctx->f32)
-   return 32;
-   if (type == ctx->f64)
-   return 64;
-
-   unreachable("Unhandled type kind in get_elem_bits");
-}
-
 static LLVMValueRef unpack_param(struct ac_llvm_context *ctx,
 LLVMValueRef param, unsigned rshift,
 unsigned bitwidth)
@@ -1267,7 +1249,7 @@ static LLVMValueRef emit_intrin_1f_param(struct 
ac_llvm_context *ctx,
};
 
MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", 
intrin,
-get_elem_bits(ctx, 
result_type));
+ac_get_elem_bits(ctx, 
result_type));
assert(length < sizeof(name));
return ac_build_intrinsic(ctx, name, result_type, params, 1, 
AC_FUNC_ATTR_READNONE);
 }
@@ -1284,7 +1266,7 @@ static LLVMValueRef emit_intrin_2f_param(struct 
ac_llvm_context *ctx,
};
 
MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", 
intrin,
-get_elem_bits(ctx, 
result_type));
+ac_get_elem_bits(ctx, 
result_type));
assert(length < sizeof(name));
return ac_build_intrinsic(ctx, name, result_type, params, 2, 
AC_FUNC_ATTR_READNONE);
 }
@@ -1302,7 +1284,7 @@ static LLVMValueRef emit_intrin_3f_param(struct 
ac_llvm_context *ctx,
};
 
MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", 
intrin,
-get_elem_bits(ctx, 
result_type));
+ac_get_elem_bits(ctx, 
result_type));
assert(length < sizeof(name));
return ac_build_intrinsic(ctx, name, result_type, params, 3, 
AC_FUNC_ATTR_READNONE);
 }
@@ -1922,7 +1904,7 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
result = ac_build_intrinsic(>ac, "llvm.bitreverse.i32", 
ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
break;
case nir_op_bit_count:
-   if (get_elem_bits(>ac, LLVMTypeOf(src[0])) == 32)
+   if (ac_get_elem_bits(>ac, LLVMTypeOf(src[0])) == 32)
result = ac_build_intrinsic(>ac, "llvm.ctpop.i32", 
ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
else {
result = ac_build_intrinsic(>ac, "llvm.ctpop.i64", 
ctx->ac.i64, src, 1, AC_FUNC_ATTR_READNONE);
@@ -1966,7 +1948,7 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
case nir_op_u2u32:

[Mesa-dev] [PATCH 2/4] ac: add 64bit bitCount support

2018-02-05 Thread Timothy Arceri
---
 src/amd/common/ac_nir_to_llvm.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index ac4af12b3e..e06a22f8a9 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1922,7 +1922,12 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
result = ac_build_intrinsic(>ac, "llvm.bitreverse.i32", 
ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
break;
case nir_op_bit_count:
-   result = ac_build_intrinsic(>ac, "llvm.ctpop.i32", 
ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
+   if (get_elem_bits(>ac, LLVMTypeOf(src[0])) == 32)
+   result = ac_build_intrinsic(>ac, "llvm.ctpop.i32", 
ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
+   else {
+   result = ac_build_intrinsic(>ac, "llvm.ctpop.i64", 
ctx->ac.i64, src, 1, AC_FUNC_ATTR_READNONE);
+   result = ac_unpack_64_2x32_split_x(>ac, result);
+   }
break;
case nir_op_vec2:
case nir_op_vec3:
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 9/9] r600: work out target mask at framebuffer bind.

2018-02-05 Thread Roland Scheidegger
For 7-9/9
Reviewed-by: Roland Scheidegger 

Am 05.02.2018 um 05:29 schrieb Dave Airlie:
> From: Dave Airlie 
> 
> If we only get 1,2,3,6 framebuffers we want a sparse target mask.
> 
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/drivers/r600/evergreen_state.c | 10 +++---
>  src/gallium/drivers/r600/r600_pipe.h   |  1 +
>  src/gallium/drivers/r600/r600_state.c  |  2 +-
>  3 files changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/evergreen_state.c 
> b/src/gallium/drivers/r600/evergreen_state.c
> index f8042c21c0..4c9163c2a7 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -1436,7 +1436,7 @@ static void evergreen_set_framebuffer_state(struct 
> pipe_context *ctx,
>   struct r600_surface *surf;
>   struct r600_texture *rtex;
>   uint32_t i, log_samples;
> -
> + uint32_t target_mask = 0;
>   /* Flush TC when changing the framebuffer state, because the only
>* client not using TC that can change textures is the framebuffer.
>* Other places don't typically have to flush TC.
> @@ -1463,6 +1463,8 @@ static void evergreen_set_framebuffer_state(struct 
> pipe_context *ctx,
>   if (!surf)
>   continue;
>  
> + target_mask |= (0xf << (i * 4));
> +
>   rtex = (struct r600_texture*)surf->base.texture;
>  
>   r600_context_add_resource_size(ctx, state->cbufs[i]->texture);
> @@ -1528,7 +1530,9 @@ static void evergreen_set_framebuffer_state(struct 
> pipe_context *ctx,
>   r600_mark_atom_dirty(rctx, >db_misc_state.atom);
>   }
>  
> - if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs) {
> + if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs ||
> + rctx->cb_misc_state.bound_cbufs_target_mask != target_mask) {
> + rctx->cb_misc_state.bound_cbufs_target_mask = target_mask;
>   rctx->cb_misc_state.nr_cbufs = state->nr_cbufs;
>   r600_mark_atom_dirty(rctx, >cb_misc_state.atom);
>   }
> @@ -2025,7 +2029,7 @@ static void evergreen_emit_cb_misc_state(struct 
> r600_context *rctx, struct r600_
>  {
>   struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
>   struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
> - unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1;
> + unsigned fb_colormask = a->bound_cbufs_target_mask;
>   unsigned ps_colormask = a->ps_color_export_mask;
>   unsigned rat_colormask = evergreen_construct_rat_mask(rctx, a, 
> a->nr_cbufs);
>   radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
> diff --git a/src/gallium/drivers/r600/r600_pipe.h 
> b/src/gallium/drivers/r600/r600_pipe.h
> index 9b94f3654c..9caf3b8512 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -152,6 +152,7 @@ struct r600_cb_misc_state {
>   unsigned cb_color_control; /* this comes from blend state */
>   unsigned blend_colormask; /* 8*4 bits for 8 RGBA colorbuffers */
>   unsigned nr_cbufs;
> + unsigned bound_cbufs_target_mask;
>   unsigned nr_ps_color_outputs;
>   unsigned ps_color_export_mask;
>   unsigned image_rat_enabled_mask;
> diff --git a/src/gallium/drivers/r600/r600_state.c 
> b/src/gallium/drivers/r600/r600_state.c
> index 6ff8037d9c..5cf99c18b6 100644
> --- a/src/gallium/drivers/r600/r600_state.c
> +++ b/src/gallium/drivers/r600/r600_state.c
> @@ -1525,7 +1525,7 @@ static void r600_emit_cb_misc_state(struct r600_context 
> *rctx, struct r600_atom
>   }
>   radeon_set_context_reg(cs, R_028808_CB_COLOR_CONTROL, 
> a->cb_color_control);
>   } else {
> - unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 
> 1;
> + unsigned fb_colormask = a->bound_cbufs_target_mask;
>   unsigned ps_colormask = a->ps_color_export_mask;
>   unsigned multiwrite = a->multiwrite && a->nr_cbufs > 1;
>  
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 102032] nir_op_imod is incorrectly implemented as LLVM's srem

2018-02-05 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=102032

--- Comment #2 from programmerj...@gmail.com ---
(In reply to Bas Nieuwenhuizen from comment #1)
> I went looking to why there were no good CTS tests for this and found this
> in the vulkan spec:
> 
> For the OpSRem and OpSMod instructions, if either operand is negative the
> result is undefined.

I think this bug should be fixed to support OpenCL. I have not found any
references in the OpenCL specs to results of the remainder operator, so I'm
guessing it uses the definition eventually derived from C99 section 6.5.5.6
which defines the results for negative operands.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 8/9] r600: work out shader export mask at shader build time

2018-02-05 Thread Roland Scheidegger
Am 05.02.2018 um 05:29 schrieb Dave Airlie:
> From: Dave Airlie 
> 
> Since enhanced layouts allows setting specific MRT outputs, we
> can get sparse outputs, so we have to calculate the shader
> mask earlier.
> 
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/drivers/r600/evergreen_state.c   | 3 ++-
>  src/gallium/drivers/r600/r600_pipe.h | 1 +
>  src/gallium/drivers/r600/r600_shader.c   | 3 +++
>  src/gallium/drivers/r600/r600_shader.h   | 3 +++
>  src/gallium/drivers/r600/r600_state.c| 2 +-
>  src/gallium/drivers/r600/r600_state_common.c | 1 +
>  6 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/evergreen_state.c 
> b/src/gallium/drivers/r600/evergreen_state.c
> index 11e473d604..f8042c21c0 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -2026,7 +2026,7 @@ static void evergreen_emit_cb_misc_state(struct 
> r600_context *rctx, struct r600_
>   struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
>   struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
>   unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1;
> - unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * 
> 4)) - 1;
> + unsigned ps_colormask = a->ps_color_export_mask;
>   unsigned rat_colormask = evergreen_construct_rat_mask(rctx, a, 
> a->nr_cbufs);
>   radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
>   radeon_emit(cs, (a->blend_colormask & fb_colormask) | rat_colormask); 
> /* R_028238_CB_TARGET_MASK */
> @@ -3373,6 +3373,7 @@ void evergreen_update_ps_state(struct pipe_context 
> *ctx, struct r600_pipe_shader
>   exports_ps = 2;
>   }
>   shader->nr_ps_color_outputs = num_cout;
> + shader->ps_color_export_mask = rshader->ps_color_export_mask;
>   if (ninterp == 0) {
>   ninterp = 1;
>   have_perspective = TRUE;
> diff --git a/src/gallium/drivers/r600/r600_pipe.h 
> b/src/gallium/drivers/r600/r600_pipe.h
> index 0b772b2599..9b94f3654c 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -153,6 +153,7 @@ struct r600_cb_misc_state {
>   unsigned blend_colormask; /* 8*4 bits for 8 RGBA colorbuffers */
>   unsigned nr_cbufs;
>   unsigned nr_ps_color_outputs;
> + unsigned ps_color_export_mask;
>   unsigned image_rat_enabled_mask;
>   unsigned buffer_rat_enabled_mask;
>   bool multiwrite;
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index 893a71b915..9984e783b5 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -3875,6 +3875,7 @@ static int r600_shader_from_tgsi(struct r600_context 
> *rctx,
>   output[j].array_base = 
> shader->output[i].sid;
>   output[j].type = 
> V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
>   shader->nr_ps_color_exports++;
> + shader->ps_color_export_mask |= (0xf << 
> (shader->output[i].sid * 4));
>   if (shader->fs_write_all && 
> (rscreen->b.chip_class >= EVERGREEN)) {
>   for (k = 1; k < 
> max_color_exports; k++) {
>   j++;
> @@ -3890,6 +3891,7 @@ static int r600_shader_from_tgsi(struct r600_context 
> *rctx,
>   output[j].op = 
> CF_OP_EXPORT;
>   output[j].type = 
> V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
>   
> shader->nr_ps_color_exports++;
> + 
> shader->ps_color_export_mask |= (0xf << (j * 4));
>   }
>   }
>   } else if (shader->output[i].name == 
> TGSI_SEMANTIC_POSITION) {
> @@ -3978,6 +3980,7 @@ static int r600_shader_from_tgsi(struct r600_context 
> *rctx,
>   output[j].op = CF_OP_EXPORT;
>   j++;
>   shader->nr_ps_color_exports++;
> + shader->ps_color_export_mask = 0xf;
>   }
>  
>   noutput = j;
> diff --git a/src/gallium/drivers/r600/r600_shader.h 
> b/src/gallium/drivers/r600/r600_shader.h
> index da96688e54..7fca3f455e 100644
> --- a/src/gallium/drivers/r600/r600_shader.h
> +++ b/src/gallium/drivers/r600/r600_shader.h
> @@ -84,6 +84,7 @@ struct r600_shader {
>   unsignednr_ps_max_color_exports;
>   /* Real number of ps color exports compiled in the bytecode */
>   unsignednr_ps_color_exports;

Re: [Mesa-dev] [PATCH 6/9] r600/compute: only mark buffer/image state dirty for fragment shaders

2018-02-05 Thread Roland Scheidegger
Am 05.02.2018 um 05:29 schrieb Dave Airlie:
> From: Dave Airlie 
> 
> The compute emission path always emits this currently, and emitting
> it on the fragment path breaks the blitter.
> 
> This fixes gpu hangs in KHR-GL45.compute_shader.resource-texture
> 
> Signed-off-by: Dave Airlie 

I have some feeling things would be more robust if an atom must not be
emitted in some cases, then the atom emit code should take care of it,
rather than relying on not setting it dirty.
Albeit since compute does not actually really use the ordinary atom
list, maybe there should be a separate atom list really? Seems like
dirty handling for compute in general could need some improvement.
In any case, I suppose that'll have to do for now, so
for 4-6/9
Reviewed-by: Roland Scheidegger 

> ---
>  src/gallium/drivers/r600/evergreen_state.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/evergreen_state.c 
> b/src/gallium/drivers/r600/evergreen_state.c
> index 0999cc5de8..11e473d604 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -4062,7 +4062,8 @@ static void evergreen_set_shader_buffers(struct 
> pipe_context *ctx,
>   r600_mark_atom_dirty(rctx, >cb_misc_state.atom);
>   }
>  
> - r600_mark_atom_dirty(rctx, >atom);
> + if (shader == PIPE_SHADER_FRAGMENT)
> + r600_mark_atom_dirty(rctx, >atom);
>  }
>  
>  static void evergreen_set_shader_images(struct pipe_context *ctx,
> @@ -4238,7 +4239,8 @@ static void evergreen_set_shader_images(struct 
> pipe_context *ctx,
>   r600_mark_atom_dirty(rctx, >cb_misc_state.atom);
>   }
>  
> - r600_mark_atom_dirty(rctx, >atom);
> + if (shader == PIPE_SHADER_FRAGMENT)
> + r600_mark_atom_dirty(rctx, >atom);
>  }
>  
>  static void evergreen_get_pipe_constant_buffer(struct r600_context *rctx,
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Haiku: convert to autotools

2018-02-05 Thread kallisti5

On 2018-02-05 18:13, Dylan Baker wrote:


Pretty close. I lied, apparently the pthreads fix is in 0.44

https://github.com/mesonbuild/meson/commit/fc547ad05e5a8e650ae5bc2ecc7d40e4dbcc9f0f

Here's my branch, but it needs rebase pretty bad, there's also a patch 
to use
shared glapi that I added trying to see if that would get the build 
working that

needs to be removed:

https://github.com/dcbaker/mesa/tree/wip/meson-haiku



Ok. I went over and got Haiku building with menson with the following 
change:


https://github.com/kallisti5/mesa/commit/e33dfab54d99edacdf1d24c402d29f50818631b3

Any feedback welcome. I need to review your branch now and see if I can 
make improvements

based on your changes.

 Thanks!

 -- Alex
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/9] r600: overhaul buffer resource query.

2018-02-05 Thread Roland Scheidegger
Am 05.02.2018 um 05:29 schrieb Dave Airlie:
> From: Dave Airlie 
> 
> This cleans up and fixes the previous fix even more.
> 
> Buffers from textures start at max const,
> buffers from buffers/images come in from the 168 offset.
> 
> This fixes a bunch of:
> KHR-GL45.shader_storage_buffer_object*
> 
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/drivers/r600/r600_shader.c | 15 ---
>  1 file changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index 8c4460a5d5..32f24c071d 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -7007,7 +7007,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx 
> *ctx, boolean src_requires_l
>   return 0;
>  }
>  
> -static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int 
> offset)
> +static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int 
> offset, int eg_buffer_base)

I think it would be nicer if you'd just stick to the offset parameter
here. Just add both together in the caller - as far as this function is
concerned two offsets don't really make sense.

Other than that, and for 1-3/9
Reviewed-by: Roland Scheidegger 


>  {
>   struct tgsi_full_instruction *inst = 
> >parse.FullToken.FullInstruction;
>   int r;
> @@ -7033,7 +7033,7 @@ static int r600_do_buffer_txq(struct r600_shader_ctx 
> *ctx, int reg_idx, int offs
>   struct r600_bytecode_vtx vtx;
>   memset(, 0, sizeof(vtx));
>   vtx.op = FETCH_OP_GET_BUFFER_RESINFO;
> - vtx.buffer_id = id + R600_MAX_CONST_BUFFERS;
> + vtx.buffer_id = id + eg_buffer_base;
>   vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
>   vtx.src_gpr = 0;
>   vtx.mega_fetch_count = 16; /* no idea here really... */
> @@ -7107,7 +7107,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
>   if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {
>   if (ctx->bc->chip_class < EVERGREEN)
>   ctx->shader->uses_tex_buffers = true;
> - return r600_do_buffer_txq(ctx, 1, 0);
> + return r600_do_buffer_txq(ctx, 1, 0, 
> R600_MAX_CONST_BUFFERS);
>   }
>   else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
>   if (ctx->bc->chip_class < EVERGREEN)
> @@ -8821,10 +8821,11 @@ static int tgsi_resq(struct r600_shader_ctx *ctx)
>   (inst->Src[0].Register.File == TGSI_FILE_IMAGE && 
> inst->Memory.Texture == TGSI_TEXTURE_BUFFER)) {
>   if (ctx->bc->chip_class < EVERGREEN)
>   ctx->shader->uses_tex_buffers = true;
> - unsigned offset = 0;
> - if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
> - offset += R600_IMAGE_REAL_RESOURCE_OFFSET - 
> R600_MAX_CONST_BUFFERS + ctx->shader->image_size_const_offset;
> - return r600_do_buffer_txq(ctx, 0, offset);
> + unsigned eg_buffer_base = 0;
> + eg_buffer_base = R600_IMAGE_REAL_RESOURCE_OFFSET;
> + if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
> + eg_buffer_base += ctx->info.file_count[TGSI_FILE_IMAGE];
> + return r600_do_buffer_txq(ctx, 0, 
> ctx->shader->image_size_const_offset, eg_buffer_base);
>   }
>  
>   if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY &&
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nir: remove the abs call in is_neg_power_of_two

2018-02-05 Thread Vlad Golovkin
val->i32[swizzle[i]] is guaranteed to have non-positive value before the
__is_power_of_two call, so unary minus is equivalent to abs in this case.
---
 src/compiler/nir/nir_search_helpers.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_search_helpers.h 
b/src/compiler/nir/nir_search_helpers.h
index 2e3bd137d6..66e1546ae6 100644
--- a/src/compiler/nir/nir_search_helpers.h
+++ b/src/compiler/nir/nir_search_helpers.h
@@ -80,7 +80,7 @@ is_neg_power_of_two(nir_alu_instr *instr, unsigned src, 
unsigned num_components,
   case nir_type_int:
  if (val->i32[swizzle[i]] > 0)
 return false;
- if (!__is_power_of_two(abs(val->i32[swizzle[i]])))
+ if (!__is_power_of_two(-val->i32[swizzle[i]]))
 return false;
  break;
   default:
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 20/24] anv/cmd_buffer: Rework aux tracking

2018-02-05 Thread Nanley Chery
On Mon, Feb 05, 2018 at 06:05:59PM -0800, Jason Ekstrand wrote:
> On Mon, Feb 5, 2018 at 5:41 PM, Nanley Chery  wrote:
> 
> > On Fri, Jan 19, 2018 at 03:47:37PM -0800, Jason Ekstrand wrote:
> > > This commit completely reworks aux tracking.  This includes a number of
> > > somewhat distinct changes:
> > >
> > >  1) Since we are no longer fast-clearing multiple slices, we only need
> > > to track one fast clear color and one fast clear type.
> > >
> > >  2) We store two bits for fast clear instead of one to let us
> > > distinguish between zero and non-zero fast clear colors.  This is
> > > needed so that we can do full resolves when transitioning to
> > > PRESENT_SRC_KHR with gen9 CCS images where we allow zero clear
> > > values in all sorts of places wouldn't normally.
> > >
> > >  3) We now track compression state as a boolean separate from fast clear
> > > type and this is tracked on a per-slice granularity.
> > >
> > > The previous scheme had some issues when it came to individual slices of
> > > a multi-LOD images.  In particular, we only tracked "needs resolve"
> > > per-LOD but you could do a vkCmdPipelineBarrier that would only resolve
> > > a portion of the image and would set "needs resolve" to false anyway.
> > > Also, any transition from an undefined layout would reset the clear
> > > color for the entire LOD regardless of whether or not there was some
> > > clear color on some other slice.
> > >
> > > As far as full/partial resolves go, he assumptions of the previous
> > > scheme held because the one case where we do need a full resolve when
> > > CCS_E is enabled is for window-system images.  Since we only ever
> > > allowed X-tiled window-system images, CCS was entirely disabled on gen9+
> > > and we never got CCS_E.  With the advent of Y-tiled window-system
> > > buffers, we now need to properly support doing a full resolve of images
> > > marked CCS_E.
> > > ---
> > >  src/intel/vulkan/anv_blorp.c   |   3 +-
> > >  src/intel/vulkan/anv_image.c   |  96 ++-
> > >  src/intel/vulkan/anv_private.h |  53 +++---
> > >  src/intel/vulkan/genX_cmd_buffer.c | 340 +++---
> > ---
> > >  4 files changed, 331 insertions(+), 161 deletions(-)
> > >
> > > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> > > index 3698543..594b0d8 100644
> > > --- a/src/intel/vulkan/anv_blorp.c
> > > +++ b/src/intel/vulkan/anv_blorp.c
> > > @@ -1757,8 +1757,7 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
> > > * particular value and don't care about format or clear value.
> > > */
> > >const struct anv_address clear_color_addr =
> > > - anv_image_get_clear_color_addr(cmd_buffer->device, image,
> > > -aspect, level);
> > > + anv_image_get_clear_color_addr(cmd_buffer->device, image,
> > aspect);
> > >surf.clear_color_addr = anv_to_blorp_address(clear_color_addr);
> > > }
> > >
> > > diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> > > index 94b9ecb..d5f8dcf 100644
> > > --- a/src/intel/vulkan/anv_image.c
> > > +++ b/src/intel/vulkan/anv_image.c
> > > @@ -190,46 +190,54 @@ all_formats_ccs_e_compatible(const struct
> > gen_device_info *devinfo,
> > >   * fast-clear values in non-trivial cases (e.g., outside of a render
> > pass in
> > >   * which a fast clear has occurred).
> > >   *
> > > - * For the purpose of discoverability, the algorithm used to manage
> > this buffer
> > > - * is described here. A clear value in this buffer is updated when a
> > fast clear
> > > - * is performed on a subresource. One of two synchronization operations
> > is
> > > - * performed in order for a following memory access to use the
> > fast-clear
> > > - * value:
> > > - *a. Copy the value from the buffer to the surface state object
> > used for
> > > - *   reading. This is done implicitly when the value is the clear
> > value
> > > - *   predetermined to be the default in other surface state
> > objects. This
> > > - *   is currently only done explicitly for the operation below.
> > > - *b. Do (a) and use the surface state object to resolve the
> > subresource.
> > > - *   This is only done during layout transitions for decent
> > performance.
> > > + * In order to avoid having multiple clear colors for a single plane of
> > an
> > > + * image (hence a single RENDER_SURFACE_STATE), we only allow
> > fast-clears on
> > > + * the first slice (level 0, layer 0).  At the time of our testing (Jan
> > 17,
> > > + * 2018), there were known applications which would benefit from
> > fast-clearing
> > > + * more than just the first slice.
> > >   *
> > > - * With the above scheme, we can fast-clear whenever the hardware
> > allows except
> > > - * for two cases in which synchronization becomes impossible or
> > undesirable:
> > > - ** The subresource is in the GENERAL layout and is 

[Mesa-dev] [PATCH v3 20/24] anv/cmd_buffer: Rework aux tracking

2018-02-05 Thread Jason Ekstrand
This commit completely reworks aux tracking.  This includes a number of
somewhat distinct changes:

 1) Since we are no longer fast-clearing multiple slices, we only need
to track one fast clear color and one fast clear type.

 2) We store two bits for fast clear instead of one to let us
distinguish between zero and non-zero fast clear colors.  This is
needed so that we can do full resolves when transitioning to
PRESENT_SRC_KHR with gen9 CCS images where we allow zero clear
values in all sorts of places we wouldn't normally.

 3) We now track compression state as a boolean separate from fast clear
type and this is tracked on a per-slice granularity.

The previous scheme had some issues when it came to individual slices of
a multi-LOD images.  In particular, we only tracked "needs resolve"
per-LOD but you could do a vkCmdPipelineBarrier that would only resolve
a portion of the image and would set "needs resolve" to false anyway.
Also, any transition from an undefined layout would reset the clear
color for the entire LOD regardless of whether or not there was some
clear color on some other slice.

As far as full/partial resolves go, he assumptions of the previous
scheme held because the one case where we do need a full resolve when
CCS_E is enabled is for window-system images.  Since we only ever
allowed X-tiled window-system images, CCS was entirely disabled on gen9+
and we never got CCS_E.  With the advent of Y-tiled window-system
buffers, we now need to properly support doing a full resolve of images
marked CCS_E.

v2 (Jason Ekstrand):
 - Fix an bug in the compressed flag offset calculation
 - Treat 3D images as multi-slice for the purposes of resolve tracking

Reviewed-by: Topi Pohjolainen 
---
 src/intel/vulkan/anv_blorp.c   |   3 +-
 src/intel/vulkan/anv_image.c   | 100 ++-
 src/intel/vulkan/anv_private.h |  60 ---
 src/intel/vulkan/genX_cmd_buffer.c | 340 +++--
 4 files changed, 345 insertions(+), 158 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 497ae6f..fc3b717 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1758,8 +1758,7 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
* particular value and don't care about format or clear value.
*/
   const struct anv_address clear_color_addr =
- anv_image_get_clear_color_addr(cmd_buffer->device, image,
-aspect, level);
+ anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect);
   surf.clear_color_addr = anv_to_blorp_address(clear_color_addr);
}
 
diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index 11942d0..011e952 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -190,46 +190,54 @@ all_formats_ccs_e_compatible(const struct gen_device_info 
*devinfo,
  * fast-clear values in non-trivial cases (e.g., outside of a render pass in
  * which a fast clear has occurred).
  *
- * For the purpose of discoverability, the algorithm used to manage this buffer
- * is described here. A clear value in this buffer is updated when a fast clear
- * is performed on a subresource. One of two synchronization operations is
- * performed in order for a following memory access to use the fast-clear
- * value:
- *a. Copy the value from the buffer to the surface state object used for
- *   reading. This is done implicitly when the value is the clear value
- *   predetermined to be the default in other surface state objects. This
- *   is currently only done explicitly for the operation below.
- *b. Do (a) and use the surface state object to resolve the subresource.
- *   This is only done during layout transitions for decent performance.
+ * In order to avoid having multiple clear colors for a single plane of an
+ * image (hence a single RENDER_SURFACE_STATE), we only allow fast-clears on
+ * the first slice (level 0, layer 0).  At the time of our testing (Jan 17,
+ * 2018), there were no known applications which would benefit from fast-
+ * clearing more than just the first slice.
  *
- * With the above scheme, we can fast-clear whenever the hardware allows except
- * for two cases in which synchronization becomes impossible or undesirable:
- ** The subresource is in the GENERAL layout and is cleared to a value
- *  other than the special default value.
+ * The fast clear portion of the image is laid out in the following order:
  *
- *  Performing a synchronization operation in order to read from the
- *  subresource is undesirable in this case. Firstly, b) is not an option
- *  because a layout transition isn't required between a write and read of
- *  an image in the GENERAL layout. Secondly, it's undesirable to do a)
- *  explicitly because it would require large infrastructural changes. The
- 

Re: [Mesa-dev] [PATCH v2 20/24] anv/cmd_buffer: Rework aux tracking

2018-02-05 Thread Nanley Chery
On Fri, Jan 19, 2018 at 03:47:37PM -0800, Jason Ekstrand wrote:
> This commit completely reworks aux tracking.  This includes a number of
> somewhat distinct changes:
> 
>  1) Since we are no longer fast-clearing multiple slices, we only need
> to track one fast clear color and one fast clear type.
> 
>  2) We store two bits for fast clear instead of one to let us
> distinguish between zero and non-zero fast clear colors.  This is
> needed so that we can do full resolves when transitioning to
> PRESENT_SRC_KHR with gen9 CCS images where we allow zero clear
> values in all sorts of places wouldn't normally.
> 
>  3) We now track compression state as a boolean separate from fast clear
> type and this is tracked on a per-slice granularity.
> 
> The previous scheme had some issues when it came to individual slices of
> a multi-LOD images.  In particular, we only tracked "needs resolve"
> per-LOD but you could do a vkCmdPipelineBarrier that would only resolve
> a portion of the image and would set "needs resolve" to false anyway.
> Also, any transition from an undefined layout would reset the clear
> color for the entire LOD regardless of whether or not there was some
> clear color on some other slice.
> 
> As far as full/partial resolves go, he assumptions of the previous
> scheme held because the one case where we do need a full resolve when
> CCS_E is enabled is for window-system images.  Since we only ever
> allowed X-tiled window-system images, CCS was entirely disabled on gen9+
> and we never got CCS_E.  With the advent of Y-tiled window-system
> buffers, we now need to properly support doing a full resolve of images
> marked CCS_E.
> ---
>  src/intel/vulkan/anv_blorp.c   |   3 +-
>  src/intel/vulkan/anv_image.c   |  96 ++-
>  src/intel/vulkan/anv_private.h |  53 +++---
>  src/intel/vulkan/genX_cmd_buffer.c | 340 
> +++--
>  4 files changed, 331 insertions(+), 161 deletions(-)
> 

Could you send out another rev with the 3D surface changes squashed in?
I was almost about to send review feedback on a bug you've already fixed
with the add-on patch.

Thanks,
Nanley

> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index 3698543..594b0d8 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -1757,8 +1757,7 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
> * particular value and don't care about format or clear value.
> */
>const struct anv_address clear_color_addr =
> - anv_image_get_clear_color_addr(cmd_buffer->device, image,
> -aspect, level);
> + anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect);
>surf.clear_color_addr = anv_to_blorp_address(clear_color_addr);
> }
>  
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index 94b9ecb..d5f8dcf 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -190,46 +190,54 @@ all_formats_ccs_e_compatible(const struct 
> gen_device_info *devinfo,
>   * fast-clear values in non-trivial cases (e.g., outside of a render pass in
>   * which a fast clear has occurred).
>   *
> - * For the purpose of discoverability, the algorithm used to manage this 
> buffer
> - * is described here. A clear value in this buffer is updated when a fast 
> clear
> - * is performed on a subresource. One of two synchronization operations is
> - * performed in order for a following memory access to use the fast-clear
> - * value:
> - *a. Copy the value from the buffer to the surface state object used for
> - *   reading. This is done implicitly when the value is the clear value
> - *   predetermined to be the default in other surface state objects. This
> - *   is currently only done explicitly for the operation below.
> - *b. Do (a) and use the surface state object to resolve the subresource.
> - *   This is only done during layout transitions for decent performance.
> + * In order to avoid having multiple clear colors for a single plane of an
> + * image (hence a single RENDER_SURFACE_STATE), we only allow fast-clears on
> + * the first slice (level 0, layer 0).  At the time of our testing (Jan 17,
> + * 2018), there were known applications which would benefit from 
> fast-clearing
> + * more than just the first slice.
>   *
> - * With the above scheme, we can fast-clear whenever the hardware allows 
> except
> - * for two cases in which synchronization becomes impossible or undesirable:
> - ** The subresource is in the GENERAL layout and is cleared to a value
> - *  other than the special default value.
> + * The fast clear portion of the image is laid out in the following order:
>   *
> - *  Performing a synchronization operation in order to read from the
> - *  subresource is undesirable in this case. Firstly, b) is not an option
> - *  because a 

Re: [Mesa-dev] [PATCH v2 20/24] anv/cmd_buffer: Rework aux tracking

2018-02-05 Thread Jason Ekstrand
On Mon, Feb 5, 2018 at 5:41 PM, Nanley Chery  wrote:

> On Fri, Jan 19, 2018 at 03:47:37PM -0800, Jason Ekstrand wrote:
> > This commit completely reworks aux tracking.  This includes a number of
> > somewhat distinct changes:
> >
> >  1) Since we are no longer fast-clearing multiple slices, we only need
> > to track one fast clear color and one fast clear type.
> >
> >  2) We store two bits for fast clear instead of one to let us
> > distinguish between zero and non-zero fast clear colors.  This is
> > needed so that we can do full resolves when transitioning to
> > PRESENT_SRC_KHR with gen9 CCS images where we allow zero clear
> > values in all sorts of places wouldn't normally.
> >
> >  3) We now track compression state as a boolean separate from fast clear
> > type and this is tracked on a per-slice granularity.
> >
> > The previous scheme had some issues when it came to individual slices of
> > a multi-LOD images.  In particular, we only tracked "needs resolve"
> > per-LOD but you could do a vkCmdPipelineBarrier that would only resolve
> > a portion of the image and would set "needs resolve" to false anyway.
> > Also, any transition from an undefined layout would reset the clear
> > color for the entire LOD regardless of whether or not there was some
> > clear color on some other slice.
> >
> > As far as full/partial resolves go, he assumptions of the previous
> > scheme held because the one case where we do need a full resolve when
> > CCS_E is enabled is for window-system images.  Since we only ever
> > allowed X-tiled window-system images, CCS was entirely disabled on gen9+
> > and we never got CCS_E.  With the advent of Y-tiled window-system
> > buffers, we now need to properly support doing a full resolve of images
> > marked CCS_E.
> > ---
> >  src/intel/vulkan/anv_blorp.c   |   3 +-
> >  src/intel/vulkan/anv_image.c   |  96 ++-
> >  src/intel/vulkan/anv_private.h |  53 +++---
> >  src/intel/vulkan/genX_cmd_buffer.c | 340 +++---
> ---
> >  4 files changed, 331 insertions(+), 161 deletions(-)
> >
> > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> > index 3698543..594b0d8 100644
> > --- a/src/intel/vulkan/anv_blorp.c
> > +++ b/src/intel/vulkan/anv_blorp.c
> > @@ -1757,8 +1757,7 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
> > * particular value and don't care about format or clear value.
> > */
> >const struct anv_address clear_color_addr =
> > - anv_image_get_clear_color_addr(cmd_buffer->device, image,
> > -aspect, level);
> > + anv_image_get_clear_color_addr(cmd_buffer->device, image,
> aspect);
> >surf.clear_color_addr = anv_to_blorp_address(clear_color_addr);
> > }
> >
> > diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> > index 94b9ecb..d5f8dcf 100644
> > --- a/src/intel/vulkan/anv_image.c
> > +++ b/src/intel/vulkan/anv_image.c
> > @@ -190,46 +190,54 @@ all_formats_ccs_e_compatible(const struct
> gen_device_info *devinfo,
> >   * fast-clear values in non-trivial cases (e.g., outside of a render
> pass in
> >   * which a fast clear has occurred).
> >   *
> > - * For the purpose of discoverability, the algorithm used to manage
> this buffer
> > - * is described here. A clear value in this buffer is updated when a
> fast clear
> > - * is performed on a subresource. One of two synchronization operations
> is
> > - * performed in order for a following memory access to use the
> fast-clear
> > - * value:
> > - *a. Copy the value from the buffer to the surface state object
> used for
> > - *   reading. This is done implicitly when the value is the clear
> value
> > - *   predetermined to be the default in other surface state
> objects. This
> > - *   is currently only done explicitly for the operation below.
> > - *b. Do (a) and use the surface state object to resolve the
> subresource.
> > - *   This is only done during layout transitions for decent
> performance.
> > + * In order to avoid having multiple clear colors for a single plane of
> an
> > + * image (hence a single RENDER_SURFACE_STATE), we only allow
> fast-clears on
> > + * the first slice (level 0, layer 0).  At the time of our testing (Jan
> 17,
> > + * 2018), there were known applications which would benefit from
> fast-clearing
> > + * more than just the first slice.
> >   *
> > - * With the above scheme, we can fast-clear whenever the hardware
> allows except
> > - * for two cases in which synchronization becomes impossible or
> undesirable:
> > - ** The subresource is in the GENERAL layout and is cleared to a
> value
> > - *  other than the special default value.
> > + * The fast clear portion of the image is laid out in the following
> order:
> >   *
> > - *  Performing a synchronization operation in order to read from the
> > - *  subresource 

Re: [Mesa-dev] [PATCH v2 20/24] anv/cmd_buffer: Rework aux tracking

2018-02-05 Thread Nanley Chery
On Fri, Jan 19, 2018 at 03:47:37PM -0800, Jason Ekstrand wrote:
> This commit completely reworks aux tracking.  This includes a number of
> somewhat distinct changes:
> 
>  1) Since we are no longer fast-clearing multiple slices, we only need
> to track one fast clear color and one fast clear type.
> 
>  2) We store two bits for fast clear instead of one to let us
> distinguish between zero and non-zero fast clear colors.  This is
> needed so that we can do full resolves when transitioning to
> PRESENT_SRC_KHR with gen9 CCS images where we allow zero clear
> values in all sorts of places wouldn't normally.
> 
>  3) We now track compression state as a boolean separate from fast clear
> type and this is tracked on a per-slice granularity.
> 
> The previous scheme had some issues when it came to individual slices of
> a multi-LOD images.  In particular, we only tracked "needs resolve"
> per-LOD but you could do a vkCmdPipelineBarrier that would only resolve
> a portion of the image and would set "needs resolve" to false anyway.
> Also, any transition from an undefined layout would reset the clear
> color for the entire LOD regardless of whether or not there was some
> clear color on some other slice.
> 
> As far as full/partial resolves go, he assumptions of the previous
> scheme held because the one case where we do need a full resolve when
> CCS_E is enabled is for window-system images.  Since we only ever
> allowed X-tiled window-system images, CCS was entirely disabled on gen9+
> and we never got CCS_E.  With the advent of Y-tiled window-system
> buffers, we now need to properly support doing a full resolve of images
> marked CCS_E.
> ---
>  src/intel/vulkan/anv_blorp.c   |   3 +-
>  src/intel/vulkan/anv_image.c   |  96 ++-
>  src/intel/vulkan/anv_private.h |  53 +++---
>  src/intel/vulkan/genX_cmd_buffer.c | 340 
> +++--
>  4 files changed, 331 insertions(+), 161 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index 3698543..594b0d8 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -1757,8 +1757,7 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
> * particular value and don't care about format or clear value.
> */
>const struct anv_address clear_color_addr =
> - anv_image_get_clear_color_addr(cmd_buffer->device, image,
> -aspect, level);
> + anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect);
>surf.clear_color_addr = anv_to_blorp_address(clear_color_addr);
> }
>  
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index 94b9ecb..d5f8dcf 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -190,46 +190,54 @@ all_formats_ccs_e_compatible(const struct 
> gen_device_info *devinfo,
>   * fast-clear values in non-trivial cases (e.g., outside of a render pass in
>   * which a fast clear has occurred).
>   *
> - * For the purpose of discoverability, the algorithm used to manage this 
> buffer
> - * is described here. A clear value in this buffer is updated when a fast 
> clear
> - * is performed on a subresource. One of two synchronization operations is
> - * performed in order for a following memory access to use the fast-clear
> - * value:
> - *a. Copy the value from the buffer to the surface state object used for
> - *   reading. This is done implicitly when the value is the clear value
> - *   predetermined to be the default in other surface state objects. This
> - *   is currently only done explicitly for the operation below.
> - *b. Do (a) and use the surface state object to resolve the subresource.
> - *   This is only done during layout transitions for decent performance.
> + * In order to avoid having multiple clear colors for a single plane of an
> + * image (hence a single RENDER_SURFACE_STATE), we only allow fast-clears on
> + * the first slice (level 0, layer 0).  At the time of our testing (Jan 17,
> + * 2018), there were known applications which would benefit from 
> fast-clearing
> + * more than just the first slice.
>   *
> - * With the above scheme, we can fast-clear whenever the hardware allows 
> except
> - * for two cases in which synchronization becomes impossible or undesirable:
> - ** The subresource is in the GENERAL layout and is cleared to a value
> - *  other than the special default value.
> + * The fast clear portion of the image is laid out in the following order:
>   *
> - *  Performing a synchronization operation in order to read from the
> - *  subresource is undesirable in this case. Firstly, b) is not an option
> - *  because a layout transition isn't required between a write and read 
> of
> - *  an image in the GENERAL layout. Secondly, it's undesirable to do a)
> - *  explicitly because it would 

Re: [Mesa-dev] [PATCH] Haiku: convert to autotools

2018-02-05 Thread kallisti5

On 2018-02-05 18:13, Dylan Baker wrote:

Quoting kallisti5 (2018-02-05 15:36:06)

On 2018-02-05 16:14, kallisti5 wrote:
> On 2018-02-05 15:39, Dylan Baker wrote:
>> Quoting kallisti5 (2018-02-05 12:58:30)
>>> On 2017-10-24 11:47, Emil Velikov wrote:
>>> > Hi Jerome,
>>> >
>>> > On 23 October 2017 at 16:58, Jerome Duval 
>>> > wrote:
>>> >> * configure.ac:
>>> >>   -pthread is not available on Haiku.
>>> >>   Haiku doesn't require --enable-dri
>>> >>   build hgl on Haiku
>>> >> * egl/Makefile.am: define backendfiles for Haiku
>>> >> * src/gallium/Makefile.am: build winsys/sw/hgl, state_trackers/hgl and
>>> >> targets/haiku-softpipe on Haiku.
>>> >> * src/gallium/targets/haiku-softpipe: add Makefile.am
>>> >> * src/gallium/state_trackers/hgl: add Makefile.am
>>> >> * winsys/sw/hgl: add Makefile.am
>>> >> * src/hgl/Makefile.am: add Makefile.am
>>> >> ---
>>> > Thanks for the patch. I think Eric has a point regarding splitting this
>>> > up.
>>> > Here is one way to handle it:
>>> >  - patch 1 - the driver, aka st/hgl + sw/hgl + targets/haiku
>>> >  - 2 - src/egl
>>> >  - 3 - src/hgl
>>> >  - 4 misc fixes (the SoftwareRenderer.cpp hunk?)
>>> >  - 5 toggle - configure.ac + src/Makefile.am
>>>
>>> Hm, it looks like Jerome never got back to work on these changes...
>>> let
>>> me try to
>>> pick up the ball and run with it.
>>>
>>> > Couple of small suggestions:
>>> >  - keep all the sources and headers in the sources lists in
>>> > Makefile.sources
>>> >  - how do you guys manage pthreads - please mention that in the commit
>>> > message.
>>> >
>>> > If I'm reading this correctly, you strip out -pthread and there's no
>>> > pthread-stubs on Haiku.
>>>
>>> Haiku (and BeOS for that matter) has pthread support built into its
>>> core
>>> libroot.so.
>>>
>>> No need for -lpthread, all applications can assume its presence.
>>> Things
>>> that link -lpthread actually fail due to a non-existant libpthread...
>>> *however* as i'm typing this i'm being told we recently implemented a
>>> dummy static libpthread.a to try and appease assumptions about
>>> -lpthread
>>> existence so i'll remove the pthread checks :-)
>>>
>>>   -- Alex
>>
>> Hi Alex,
>>
>> I have a branch for building haiku with meson, when I was trying to
>> compile
>> neither the scons build nor the autotools build seemed to compile on a
>> Haiku VM
>> instance (x86_64), that was a few months ago though, so maybe its
>> fixed.
>>
>> Our plan is to remove autotools from mesa, probably this year. I'm
>> thinking if
>> things look pretty good through the 18.0 release cycle I'll probably
>> propose
>> marking autotools as deprecated for 18.1 and propose removal in 18.2.
>
> Ah. crap.  I just got autoconfig working :-).  Historically I have only
> used
> SCons for our builds.  I always preferred the SCons build since
> autotools always
> ends up looking like spaghetti.  Here is what our current build does:
>
> 
https://github.com/haikuports/haikuports/blob/master/sys-libs/mesa/mesa-17.1.4.recipe#L52
>
> It looks like Jerome hacked in a patch for autotools... but i've heard
> some reports
> of instability with the resulting artifacts.
>
>> I'm not going to block you guys using autotools or NAK anything, I
>> just want
>> you to be aware that we're trying to consolidate down to just meson
>> and
>> android.mk files. I can respin the haiku patches and CC you if you're
>> interested in
>> looking at them.
>
> If Meson is the future, i'm definitely down helping (or even taking
> over) that branch
> if it is just incomplete Haiku work.
>
> I'm going to try and do better maintenance on Haiku Mesa in 2018. I've
> been only around
> minimally in 2017 am a little out of date.
>
>> You might also want to see if you guys can update your meson, at least
>> last time
>> I checked it was 0.42, and I fixed the pthreads stuff in 0.43 so that
>> -pthread
>> and -lpthread are never added by meson.
>
> I just installed meson on Haiku and we are currently at 0.43.0

I took a quick crack at meson on Haiku. Getting stuck with a -pthread
getting injected
somewhere around glapi gen.

Lets see if my modifications are close to yours sight unseen :-)

https://gist.github.com/kallisti5/eb43162dd4c9e61b5740444d20955118

  -- Alex


Pretty close. I lied, apparently the pthreads fix is in 0.44

https://github.com/mesonbuild/meson/commit/fc547ad05e5a8e650ae5bc2ecc7d40e4dbcc9f0f

Here's my branch, but it needs rebase pretty bad, there's also a patch 
to use
shared glapi that I added trying to see if that would get the build 
working that

needs to be removed:

https://github.com/dcbaker/mesa/tree/wip/meson-haiku


I went ahead and made a recipe for 0.44.0. Our repos should have it 
soon.

I can confirm the pthread issue is solved via that change.

Do you mind if I take the work in your branch and try to rebase/complete 
it and upstream it?  If we can get Meson working, I think Haiku is one 
of the last SCons consumers... maybe everyone will be on-board dropping 

Re: [Mesa-dev] [PATCH] Haiku: convert to autotools

2018-02-05 Thread Dylan Baker
Quoting kallisti5 (2018-02-05 15:36:06)
> On 2018-02-05 16:14, kallisti5 wrote:
> > On 2018-02-05 15:39, Dylan Baker wrote:
> >> Quoting kallisti5 (2018-02-05 12:58:30)
> >>> On 2017-10-24 11:47, Emil Velikov wrote:
> >>> > Hi Jerome,
> >>> >
> >>> > On 23 October 2017 at 16:58, Jerome Duval 
> >>> > wrote:
> >>> >> * configure.ac:
> >>> >>   -pthread is not available on Haiku.
> >>> >>   Haiku doesn't require --enable-dri
> >>> >>   build hgl on Haiku
> >>> >> * egl/Makefile.am: define backendfiles for Haiku
> >>> >> * src/gallium/Makefile.am: build winsys/sw/hgl, state_trackers/hgl and
> >>> >> targets/haiku-softpipe on Haiku.
> >>> >> * src/gallium/targets/haiku-softpipe: add Makefile.am
> >>> >> * src/gallium/state_trackers/hgl: add Makefile.am
> >>> >> * winsys/sw/hgl: add Makefile.am
> >>> >> * src/hgl/Makefile.am: add Makefile.am
> >>> >> ---
> >>> > Thanks for the patch. I think Eric has a point regarding splitting this
> >>> > up.
> >>> > Here is one way to handle it:
> >>> >  - patch 1 - the driver, aka st/hgl + sw/hgl + targets/haiku
> >>> >  - 2 - src/egl
> >>> >  - 3 - src/hgl
> >>> >  - 4 misc fixes (the SoftwareRenderer.cpp hunk?)
> >>> >  - 5 toggle - configure.ac + src/Makefile.am
> >>> 
> >>> Hm, it looks like Jerome never got back to work on these changes... 
> >>> let
> >>> me try to
> >>> pick up the ball and run with it.
> >>> 
> >>> > Couple of small suggestions:
> >>> >  - keep all the sources and headers in the sources lists in
> >>> > Makefile.sources
> >>> >  - how do you guys manage pthreads - please mention that in the commit
> >>> > message.
> >>> >
> >>> > If I'm reading this correctly, you strip out -pthread and there's no
> >>> > pthread-stubs on Haiku.
> >>> 
> >>> Haiku (and BeOS for that matter) has pthread support built into its 
> >>> core
> >>> libroot.so.
> >>> 
> >>> No need for -lpthread, all applications can assume its presence. 
> >>> Things
> >>> that link -lpthread actually fail due to a non-existant libpthread...
> >>> *however* as i'm typing this i'm being told we recently implemented a
> >>> dummy static libpthread.a to try and appease assumptions about 
> >>> -lpthread
> >>> existence so i'll remove the pthread checks :-)
> >>> 
> >>>   -- Alex
> >> 
> >> Hi Alex,
> >> 
> >> I have a branch for building haiku with meson, when I was trying to 
> >> compile
> >> neither the scons build nor the autotools build seemed to compile on a 
> >> Haiku VM
> >> instance (x86_64), that was a few months ago though, so maybe its 
> >> fixed.
> >> 
> >> Our plan is to remove autotools from mesa, probably this year. I'm 
> >> thinking if
> >> things look pretty good through the 18.0 release cycle I'll probably 
> >> propose
> >> marking autotools as deprecated for 18.1 and propose removal in 18.2.
> > 
> > Ah. crap.  I just got autoconfig working :-).  Historically I have only 
> > used
> > SCons for our builds.  I always preferred the SCons build since 
> > autotools always
> > ends up looking like spaghetti.  Here is what our current build does:
> > 
> > https://github.com/haikuports/haikuports/blob/master/sys-libs/mesa/mesa-17.1.4.recipe#L52
> > 
> > It looks like Jerome hacked in a patch for autotools... but i've heard
> > some reports
> > of instability with the resulting artifacts.
> > 
> >> I'm not going to block you guys using autotools or NAK anything, I 
> >> just want
> >> you to be aware that we're trying to consolidate down to just meson 
> >> and
> >> android.mk files. I can respin the haiku patches and CC you if you're 
> >> interested in
> >> looking at them.
> > 
> > If Meson is the future, i'm definitely down helping (or even taking
> > over) that branch
> > if it is just incomplete Haiku work.
> > 
> > I'm going to try and do better maintenance on Haiku Mesa in 2018. I've
> > been only around
> > minimally in 2017 am a little out of date.
> > 
> >> You might also want to see if you guys can update your meson, at least 
> >> last time
> >> I checked it was 0.42, and I fixed the pthreads stuff in 0.43 so that 
> >> -pthread
> >> and -lpthread are never added by meson.
> > 
> > I just installed meson on Haiku and we are currently at 0.43.0
> 
> I took a quick crack at meson on Haiku. Getting stuck with a -pthread 
> getting injected
> somewhere around glapi gen.
> 
> Lets see if my modifications are close to yours sight unseen :-)
> 
> https://gist.github.com/kallisti5/eb43162dd4c9e61b5740444d20955118
> 
>   -- Alex

Pretty close. I lied, apparently the pthreads fix is in 0.44

https://github.com/mesonbuild/meson/commit/fc547ad05e5a8e650ae5bc2ecc7d40e4dbcc9f0f

Here's my branch, but it needs rebase pretty bad, there's also a patch to use
shared glapi that I added trying to see if that would get the build working that
needs to be removed:

https://github.com/dcbaker/mesa/tree/wip/meson-haiku

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list

[Mesa-dev] [PATCH v2] radeonsi/nir: always set input_usage_mask as using all components

2018-02-05 Thread Timothy Arceri
This fixes a regression for now, in the future we should gather
the used components properly.

V2: just set for VS and correctly handle doubles

Fixes: be973ed21f6e "radeonsi: load the right number of components for VS 
inputs and TBOs"

Cc: Marek Olšák 
---
 src/gallium/drivers/radeonsi/si_shader_nir.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 8abffdb8fc..06d9354363 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -304,14 +304,22 @@ void si_nir_scan_shader(const struct nir_shader *nir,
unsigned attrib_count = glsl_count_attribute_slots(type,
   
nir->info.stage == MESA_SHADER_VERTEX);
 
+   i = variable->data.driver_location;
+
/* Vertex shader inputs don't have semantics. The state
 * tracker has already mapped them to attributes via
 * variable->data.driver_location.
 */
if (nir->info.stage == MESA_SHADER_VERTEX) {
-   if (glsl_type_is_dual_slot(variable->type))
+   /* TODO: gather the actual input useage and remove 
this. */
+   info->input_usage_mask[i] = TGSI_WRITEMASK_XYZW;
+
+   if (glsl_type_is_dual_slot(variable->type)) {
num_inputs += 2;
-   else
+
+   /* TODO: gather the actual input useage and 
remove this. */
+   info->input_usage_mask[i+1] = 
TGSI_WRITEMASK_XYZW;
+   } else
num_inputs++;
continue;
}
@@ -327,8 +335,6 @@ void si_nir_scan_shader(const struct nir_shader *nir,
continue;
}
 
-   i = variable->data.driver_location;
-
for (unsigned j = 0; j < attrib_count; j++, i++) {
 
if (processed_inputs & ((uint64_t)1 << i))
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/6] android: Move gralloc handle struct to libdrm

2018-02-05 Thread Rob Herring
On Mon, Jan 29, 2018 at 11:37 AM, Robert Foss  wrote:
> This struct is used in mesa and drm_hwcomposer.
> Versions of if have been implemented in several grallocs:
> drm_gralloc, gbm_gralloc, minigbm and intel-minigbm.
>
> Other than the 1:1 move of the struct a new generic name
> has been chosen and variables have had comments added to them.
>
> Signed-off-by: Robert Foss 
> ---
> Changes since v1:
>  Suggested by Rob Herring:
>  - Fixed copyright statement
>  - Moved FDs to be first in handle
>  - Initialize native_handle_t using native_handle_create()
>
>  Android.mk   |   8 +++-
>  Makefile.sources |   3 ++
>  android/gralloc_handle.h | 102 
> +++
>  3 files changed, 111 insertions(+), 2 deletions(-)
>  create mode 100644 android/gralloc_handle.h
>
> diff --git a/Android.mk b/Android.mk
> index 292be2360263..8611c5e316d8 100644
> --- a/Android.mk
> +++ b/Android.mk
> @@ -28,7 +28,7 @@ LIBDRM_TOP := $(LOCAL_PATH)
>
>  include $(CLEAR_VARS)
>
> -# Import variables LIBDRM_{,H_,INCLUDE_H_,INCLUDE_VMWGFX_H_}FILES
> +# Import variables 
> LIBDRM_{,H,INCLUDE_H,INCLUDE_ANDROID_H,INCLUDE_VMWGFX_H}_FILES
>  include $(LOCAL_PATH)/Makefile.sources
>
>  #static library for the device (recovery)
> @@ -38,7 +38,8 @@ LOCAL_MODULE := libdrm
>  LOCAL_SRC_FILES := $(LIBDRM_FILES)
>  LOCAL_EXPORT_C_INCLUDE_DIRS := \
> $(LOCAL_PATH) \
> -   $(LOCAL_PATH)/include/drm
> +   $(LOCAL_PATH)/include/drm \
> +   $(LOCAL_PATH)/android
>
>  LOCAL_C_INCLUDES := \
> $(LOCAL_PATH)/include/drm
> @@ -54,6 +55,9 @@ LOCAL_SRC_FILES := $(LIBDRM_FILES)
>  LOCAL_EXPORT_C_INCLUDE_DIRS := \
>  $(LOCAL_PATH)/include/drm
>
> +LOCAL_SHARED_LIBRARIES := \
> +   libcutils
> +
>  LOCAL_C_INCLUDES := \
>  $(LOCAL_PATH)/include/drm
>
> diff --git a/Makefile.sources b/Makefile.sources
> index 10aa1d0f4b6e..1f8372bca183 100644
> --- a/Makefile.sources
> +++ b/Makefile.sources
> @@ -37,5 +37,8 @@ LIBDRM_INCLUDE_H_FILES := \
> include/drm/via_drm.h \
> include/drm/virtgpu_drm.h
>
> +LIBDRM_INCLUDE_ANDROID_H_FILES := \
> +   android/gralloc_handle.h
> +
>  LIBDRM_INCLUDE_VMWGFX_H_FILES := \
> include/drm/vmwgfx_drm.h
> diff --git a/android/gralloc_handle.h b/android/gralloc_handle.h
> new file mode 100644
> index ..770ee7adb4b5
> --- /dev/null
> +++ b/android/gralloc_handle.h
> @@ -0,0 +1,102 @@
> +/*
> + * Copyright (C) 2018 Robert Foss 

Sorry, if I wasn't clear, but this obviously comes from
gralloc_drm_handle.h. You should maintain those copyrights (and make
sure we aren't changing the license).

> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
> FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
> THE
> + * SOFTWARE.
> + *
> + * Authors:
> + *Robert Foss 
> + */
> +
> +#ifndef __ANDROID_GRALLOC_HANDLE_H__
> +#define __ANDROID_GRALLOC_HANDLE_H__
> +
> +#include 
> +
> +/* support users of drm_gralloc/gbm_gralloc */
> +#define gralloc_gbm_handle_t gralloc_handle_t
> +#define gralloc_drm_handle_t gralloc_handle_t
> +
> +struct gralloc_handle_t {
> +   native_handle_t base;
> +
> +   /* dma-buf file descriptor
> +* Must be located first since, native_handle_t is allocated
> +* using native_handle_create(), which allocates space for
> +* sizeof(native_handle_t) + sizeof(int) * (numFds + numInts)
> +* numFds = GRALLOC_HANDLE_NUM_FDS
> +* numInts = GRALLOC_HANDLE_NUM_INTS
> +* Where numFds represents the number of FDs and
> +* numInts represents the space needed for the
> +* remainder of this struct.
> +* And the FDs are expected to be found first following
> +* native_handle_t.
> +*/
> +   int prime_fd;
> +
> +   int magic; /* differentiate between allocator impls */

[Mesa-dev] [PATCH] i965: Do null pointer check before dereferencing vue_prog_data

2018-02-05 Thread Anuj Phogat
Signed-off-by: Anuj Phogat 
---
 src/mesa/drivers/dri/i965/genX_state_upload.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index aa4d64d08e..67fb328dbc 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -3966,7 +3966,8 @@ genX(upload_ds_state)(struct brw_context *brw)
tes_prog_data->domain == BRW_TESS_DOMAIN_TRI;
 
 #if GEN_GEN >= 8
-if (vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8)
+if (vue_prog_data &&
+vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8)
ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH;
 ds.UserClipDistanceCullTestEnableBitmask =
 vue_prog_data->cull_distance_mask;
-- 
2.13.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel/isl: Add assertion for aux surface pitch

2018-02-05 Thread Anuj Phogat
I don't have a test case hitting this assert. But, it's nice to have
an assert checking the limit.

Signed-off-by: Anuj Phogat 
---
 src/intel/isl/isl_surface_state.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/intel/isl/isl_surface_state.c 
b/src/intel/isl/isl_surface_state.c
index bfb27fa4a4..afd4b80ddb 100644
--- a/src/intel/isl/isl_surface_state.c
+++ b/src/intel/isl/isl_surface_state.c
@@ -566,6 +566,8 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, 
void *state,
   s.AuxiliarySurfaceBaseAddress = info->aux_address;
   s.AuxiliarySurfacePitch = pitch_in_tiles - 1;
 
+  assert(s.AuxiliarySurfacePitch <= 511);
+
 #if GEN_GEN >= 8
   assert(GEN_GEN >= 9 || info->aux_usage != ISL_AUX_USAGE_CCS_E);
   /* Auxiliary surfaces in ISL have compressed formats but the hardware
-- 
2.13.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/3] Fix and tweak to the VAO v2

2018-02-05 Thread Brian Paul

On 02/05/2018 03:19 PM, mathias.froehl...@gmx.net wrote:

From: Mathias Fröhlich 

Hi Brian,

Actually after incorporating your review requests to set
gl_vertex_array::Size and gl_vertex_array::Ptr to zero, radeonsi
started to assert in Bitmap/CopyPixels/DrawPixels.
Which assertion, exactly?  And what test triggers it?  I'd like to take 
a close look with the llvmpipe/svga drivers just so I understand what's 
happening.


-Brian



So, here the updated series including the requested changes.
And additoinally for review the change to fix the mentioned asserts
in several piglit tests.

Please review!

best

Mathias


Mathias Fröhlich (3):
   mesa: Fix VAO buffer object tracking.
   mesa: Mute arrays for Bitmap/CopyPixels/DrawPixels callbacks.
   mesa: Only update enabled VAO gl_vertex_array entries.

  src/mesa/main/drawpix.c | 10 ++
  src/mesa/main/varray.c  | 10 ++
  src/mesa/main/varray.h  | 29 ++---
  3 files changed, 34 insertions(+), 15 deletions(-)



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] util: remove redundant check for the __clang__ macro

2018-02-05 Thread Brian Paul


I'm tempted to say the other places which only check for __GNUC__ should 
also check for __clang__, just to be move obvious (or does everyone know 
that __clang__ implies __GNUC_?).  Maybe others have an opinion.


Anyway, the location in question below seems to be first place this 
appears in the file so I'd suggest putting a simple comment there, like 
/* Note: Clang also sets __GNUC__ (see other cases below) */


-Brian

On 02/05/2018 02:41 PM, Vlad Golovkin wrote:

In this file there are similar cases with macros PUBLIC, USED and
ATTRIBUTE_NOINLINE, before defining which as __attribute__(...), code
only checks for __GNUC__.
Should I add comments there as well?

2018-02-05 22:51 GMT+02:00 Brian Paul :

On 02/05/2018 01:44 PM, Vlad Golovkin wrote:


Clang defines __GNUC__ macro, so one doesn't need to check __clang__
macro in this particular case.



Perhaps mention that in a comment below so there's no confusion.

-Brian



---
   src/util/macros.h | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/util/macros.h b/src/util/macros.h
index 432d513930..d36ca095d5 100644
--- a/src/util/macros.h
+++ b/src/util/macros.h
@@ -138,7 +138,7 @@ do {   \
 /* Forced function inlining */
   #ifndef ALWAYS_INLINE
-#  if defined(__GNUC__) || defined(__clang__)
+#  if defined(__GNUC__)
   #define ALWAYS_INLINE inline __attribute__((always_inline))
   #  elif defined(_MSC_VER)
   #define ALWAYS_INLINE __forceinline





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Haiku: convert to autotools

2018-02-05 Thread Dylan Baker
Quoting kallisti5 (2018-02-05 14:14:42)
> On 2018-02-05 15:39, Dylan Baker wrote:
> > Quoting kallisti5 (2018-02-05 12:58:30)
> >> On 2017-10-24 11:47, Emil Velikov wrote:
> >> > Hi Jerome,
> >> >
> >> > On 23 October 2017 at 16:58, Jerome Duval 
> >> > wrote:
> >> >> * configure.ac:
> >> >>   -pthread is not available on Haiku.
> >> >>   Haiku doesn't require --enable-dri
> >> >>   build hgl on Haiku
> >> >> * egl/Makefile.am: define backendfiles for Haiku
> >> >> * src/gallium/Makefile.am: build winsys/sw/hgl, state_trackers/hgl and
> >> >> targets/haiku-softpipe on Haiku.
> >> >> * src/gallium/targets/haiku-softpipe: add Makefile.am
> >> >> * src/gallium/state_trackers/hgl: add Makefile.am
> >> >> * winsys/sw/hgl: add Makefile.am
> >> >> * src/hgl/Makefile.am: add Makefile.am
> >> >> ---
> >> > Thanks for the patch. I think Eric has a point regarding splitting this
> >> > up.
> >> > Here is one way to handle it:
> >> >  - patch 1 - the driver, aka st/hgl + sw/hgl + targets/haiku
> >> >  - 2 - src/egl
> >> >  - 3 - src/hgl
> >> >  - 4 misc fixes (the SoftwareRenderer.cpp hunk?)
> >> >  - 5 toggle - configure.ac + src/Makefile.am
> >> 
> >> Hm, it looks like Jerome never got back to work on these changes... 
> >> let
> >> me try to
> >> pick up the ball and run with it.
> >> 
> >> > Couple of small suggestions:
> >> >  - keep all the sources and headers in the sources lists in
> >> > Makefile.sources
> >> >  - how do you guys manage pthreads - please mention that in the commit
> >> > message.
> >> >
> >> > If I'm reading this correctly, you strip out -pthread and there's no
> >> > pthread-stubs on Haiku.
> >> 
> >> Haiku (and BeOS for that matter) has pthread support built into its 
> >> core
> >> libroot.so.
> >> 
> >> No need for -lpthread, all applications can assume its presence. 
> >> Things
> >> that link -lpthread actually fail due to a non-existant libpthread...
> >> *however* as i'm typing this i'm being told we recently implemented a
> >> dummy static libpthread.a to try and appease assumptions about 
> >> -lpthread
> >> existence so i'll remove the pthread checks :-)
> >> 
> >>   -- Alex
> > 
> > Hi Alex,
> > 
> > I have a branch for building haiku with meson, when I was trying to 
> > compile
> > neither the scons build nor the autotools build seemed to compile on a 
> > Haiku VM
> > instance (x86_64), that was a few months ago though, so maybe its 
> > fixed.
> > 
> > Our plan is to remove autotools from mesa, probably this year. I'm 
> > thinking if
> > things look pretty good through the 18.0 release cycle I'll probably 
> > propose
> > marking autotools as deprecated for 18.1 and propose removal in 18.2.
> 
> Ah. crap.  I just got autoconfig working :-).  Historically I have only 
> used
> SCons for our builds.  I always preferred the SCons build since 
> autotools always
> ends up looking like spaghetti.  Here is what our current build does:

Sorry, I've meant to get on this a little faster, but the meson conversion has
been a lot more time consuming than I predicted it would be, lol.

> 
> https://github.com/haikuports/haikuports/blob/master/sys-libs/mesa/mesa-17.1.4.recipe#L52

Meson should make that a little simpler, since it has an install target, and we
should be able to make that work for you guys as well.

> 
> It looks like Jerome hacked in a patch for autotools... but i've heard 
> some reports
> of instability with the resulting artifacts.
> 
> > I'm not going to block you guys using autotools or NAK anything, I just 
> > want
> > you to be aware that we're trying to consolidate down to just meson and
> > android.mk files. I can respin the haiku patches and CC you if you're 
> > interested in
> > looking at them.
> 
> If Meson is the future, i'm definitely down helping (or even taking 
> over) that branch
> if it is just incomplete Haiku work.

I think it's pretty close to being ready for review. It's based on a branch to
fix static-glapi with meson that never landed, but I think I have that all
sorted now so I'm going to push that today, and that should make the haiku build
pretty simple (it's just one patch after that I think).

> 
> I'm going to try and do better maintenance on Haiku Mesa in 2018. I've 
> been only around
> minimally in 2017 am a little out of date.
> 
> > You might also want to see if you guys can update your meson, at least 
> > last time
> > I checked it was 0.42, and I fixed the pthreads stuff in 0.43 so that 
> > -pthread
> > and -lpthread are never added by meson.
> 
> I just installed meson on Haiku and we are currently at 0.43.0
> 
Awesome, that makes things a lot easier with meson.

You can have a look at the build in general, I think meson is syntactically
pretty nice, it's like a minimal python or ruby, and the builds are much faster
(that's just because ninja is really smart).

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list

Re: [Mesa-dev] [PATCH] Haiku: convert to autotools

2018-02-05 Thread kallisti5

On 2018-02-05 16:14, kallisti5 wrote:

On 2018-02-05 15:39, Dylan Baker wrote:

Quoting kallisti5 (2018-02-05 12:58:30)

On 2017-10-24 11:47, Emil Velikov wrote:
> Hi Jerome,
>
> On 23 October 2017 at 16:58, Jerome Duval 
> wrote:
>> * configure.ac:
>>   -pthread is not available on Haiku.
>>   Haiku doesn't require --enable-dri
>>   build hgl on Haiku
>> * egl/Makefile.am: define backendfiles for Haiku
>> * src/gallium/Makefile.am: build winsys/sw/hgl, state_trackers/hgl and
>> targets/haiku-softpipe on Haiku.
>> * src/gallium/targets/haiku-softpipe: add Makefile.am
>> * src/gallium/state_trackers/hgl: add Makefile.am
>> * winsys/sw/hgl: add Makefile.am
>> * src/hgl/Makefile.am: add Makefile.am
>> ---
> Thanks for the patch. I think Eric has a point regarding splitting this
> up.
> Here is one way to handle it:
>  - patch 1 - the driver, aka st/hgl + sw/hgl + targets/haiku
>  - 2 - src/egl
>  - 3 - src/hgl
>  - 4 misc fixes (the SoftwareRenderer.cpp hunk?)
>  - 5 toggle - configure.ac + src/Makefile.am

Hm, it looks like Jerome never got back to work on these changes... 
let

me try to
pick up the ball and run with it.

> Couple of small suggestions:
>  - keep all the sources and headers in the sources lists in
> Makefile.sources
>  - how do you guys manage pthreads - please mention that in the commit
> message.
>
> If I'm reading this correctly, you strip out -pthread and there's no
> pthread-stubs on Haiku.

Haiku (and BeOS for that matter) has pthread support built into its 
core

libroot.so.

No need for -lpthread, all applications can assume its presence. 
Things

that link -lpthread actually fail due to a non-existant libpthread...
*however* as i'm typing this i'm being told we recently implemented a
dummy static libpthread.a to try and appease assumptions about 
-lpthread

existence so i'll remove the pthread checks :-)

  -- Alex


Hi Alex,

I have a branch for building haiku with meson, when I was trying to 
compile
neither the scons build nor the autotools build seemed to compile on a 
Haiku VM
instance (x86_64), that was a few months ago though, so maybe its 
fixed.


Our plan is to remove autotools from mesa, probably this year. I'm 
thinking if
things look pretty good through the 18.0 release cycle I'll probably 
propose

marking autotools as deprecated for 18.1 and propose removal in 18.2.


Ah. crap.  I just got autoconfig working :-).  Historically I have only 
used
SCons for our builds.  I always preferred the SCons build since 
autotools always

ends up looking like spaghetti.  Here is what our current build does:

https://github.com/haikuports/haikuports/blob/master/sys-libs/mesa/mesa-17.1.4.recipe#L52

It looks like Jerome hacked in a patch for autotools... but i've heard
some reports
of instability with the resulting artifacts.

I'm not going to block you guys using autotools or NAK anything, I 
just want
you to be aware that we're trying to consolidate down to just meson 
and
android.mk files. I can respin the haiku patches and CC you if you're 
interested in

looking at them.


If Meson is the future, i'm definitely down helping (or even taking
over) that branch
if it is just incomplete Haiku work.

I'm going to try and do better maintenance on Haiku Mesa in 2018. I've
been only around
minimally in 2017 am a little out of date.

You might also want to see if you guys can update your meson, at least 
last time
I checked it was 0.42, and I fixed the pthreads stuff in 0.43 so that 
-pthread

and -lpthread are never added by meson.


I just installed meson on Haiku and we are currently at 0.43.0


I took a quick crack at meson on Haiku. Getting stuck with a -pthread 
getting injected

somewhere around glapi gen.

Lets see if my modifications are close to yours sight unseen :-)

https://gist.github.com/kallisti5/eb43162dd4c9e61b5740444d20955118

 -- Alex
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600/fp64: Fix build.

2018-02-05 Thread Dylan Baker
This is turning our CI red, so I'm going to go ahead and push this.

Quoting Vinson Lee (2018-02-05 15:24:45)
>   CC   r600_shader.lo
> r600_shader.c: In function ‘egcm_int_to_double’:
> r600_shader.c:4543:12: error: ‘ctx’ is a pointer; did you mean to use ‘->’?
>  if (ctx.bc->chip_class == CAYMAN)
> ^
> ->
> 
> Fixes: 35b430157776 ("r600/fp64: fix integer->double conversion")
> Signed-off-by: Vinson Lee 
> ---
>  src/gallium/drivers/r600/r600_shader.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index e3b832b04f77..4874d14a581d 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -4540,7 +4540,7 @@ static int egcm_int_to_double(struct r600_shader_ctx 
> *ctx)
> alu.dst.sel = temp_reg;
> alu.dst.chan = i;
> alu.dst.write = 1;
> -   if (ctx.bc->chip_class == CAYMAN)
> +   if (ctx->bc->chip_class == CAYMAN)
> alu.last = i == dchan + 1;
> else
> alu.last = 1; /* trans only ops on 
> evergreen */
> -- 
> 2.14.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600/fp64: Fix build.

2018-02-05 Thread Dylan Baker
I just wrote the same patch:
Reviewed-by: Dylan Baker 

Quoting Vinson Lee (2018-02-05 15:24:45)
>   CC   r600_shader.lo
> r600_shader.c: In function ‘egcm_int_to_double’:
> r600_shader.c:4543:12: error: ‘ctx’ is a pointer; did you mean to use ‘->’?
>  if (ctx.bc->chip_class == CAYMAN)
> ^
> ->
> 
> Fixes: 35b430157776 ("r600/fp64: fix integer->double conversion")
> Signed-off-by: Vinson Lee 
> ---
>  src/gallium/drivers/r600/r600_shader.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index e3b832b04f77..4874d14a581d 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -4540,7 +4540,7 @@ static int egcm_int_to_double(struct r600_shader_ctx 
> *ctx)
> alu.dst.sel = temp_reg;
> alu.dst.chan = i;
> alu.dst.write = 1;
> -   if (ctx.bc->chip_class == CAYMAN)
> +   if (ctx->bc->chip_class == CAYMAN)
> alu.last = i == dchan + 1;
> else
> alu.last = 1; /* trans only ops on 
> evergreen */
> -- 
> 2.14.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: lower fexp2(fmul(flog2(a), 2)) to fmul(a, a)

2018-02-05 Thread Ian Romanick
Do you have any data from shader-db for this change (and the other patch)?

On 02/05/2018 06:08 AM, Samuel Pitoiset wrote:
> Similar for the 4 case.
> 
> Suggested by Bas.
> 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/compiler/nir/nir_opt_algebraic.py | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/src/compiler/nir/nir_opt_algebraic.py 
> b/src/compiler/nir/nir_opt_algebraic.py
> index 6dc19d9b12..b30d1df199 100644
> --- a/src/compiler/nir/nir_opt_algebraic.py
> +++ b/src/compiler/nir/nir_opt_algebraic.py
> @@ -321,6 +321,8 @@ optimizations = [
> (('~fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), 
> '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
> (('~fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), 
> d))),
>  ('~fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 
> 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d
> +   (('~fexp2', ('fmul', ('flog2', a), 2.0)), ('fmul', a, a)),
> +   (('~fexp2', ('fmul', ('flog2', a), 4.0)), ('fmul', ('fmul', a, a), 
> ('fmul', a, a))),
> (('~fpow', a, 1.0), a),
> (('~fpow', a, 2.0), ('fmul', a, a)),
> (('~fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600/fp64: Fix build.

2018-02-05 Thread Vinson Lee
  CC   r600_shader.lo
r600_shader.c: In function ‘egcm_int_to_double’:
r600_shader.c:4543:12: error: ‘ctx’ is a pointer; did you mean to use ‘->’?
 if (ctx.bc->chip_class == CAYMAN)
^
->

Fixes: 35b430157776 ("r600/fp64: fix integer->double conversion")
Signed-off-by: Vinson Lee 
---
 src/gallium/drivers/r600/r600_shader.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index e3b832b04f77..4874d14a581d 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4540,7 +4540,7 @@ static int egcm_int_to_double(struct r600_shader_ctx *ctx)
alu.dst.sel = temp_reg;
alu.dst.chan = i;
alu.dst.write = 1;
-   if (ctx.bc->chip_class == CAYMAN)
+   if (ctx->bc->chip_class == CAYMAN)
alu.last = i == dchan + 1;
else
alu.last = 1; /* trans only ops on 
evergreen */
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/3] st/glsl_to_tgsi: move nir detection earlier - bisected

2018-02-05 Thread Timothy Arceri

On 05/02/18 15:04, Dieter Nützel wrote:

Am 02.02.2018 10:24, schrieb Timothy Arceri:

On 02/02/18 19:26, Dieter Nützel wrote:

Hello Tim,

_this_ version brake UH, UV, mpv, blender 2.79 (some test files not 
all).

Must be something with the cache file(s).


The cache currently needs to be deleted when switching between nir and
tgsi. I'm not sure it I should try to avoid this or not ... I guess it
will probably save some bug reports so I'll try send a follow up
patch.


Hi Tim,

it is NOT your fault.
I tracked it down to Marek's commit commit 
be973ed21f6e456ebd753f26a99151d9ea6e765c


This should fix things for now:

https://patchwork.freedesktop.org/patch/202759/

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi/nir: always set input_usage_mask as using all components

2018-02-05 Thread Timothy Arceri
This fixes a regression for now, in the future we should gather
the used components properly.

Fixes: be973ed21f6e "radeonsi: load the right number of components for VS 
inputs and TBOs"

Cc: Marek Olšák 
---
 src/gallium/drivers/radeonsi/si_shader_nir.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 8abffdb8fc..d2ea09706d 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -304,6 +304,11 @@ void si_nir_scan_shader(const struct nir_shader *nir,
unsigned attrib_count = glsl_count_attribute_slots(type,
   
nir->info.stage == MESA_SHADER_VERTEX);
 
+   i = variable->data.driver_location;
+
+   /* TODO: gather the actual input useage and remove this. */
+   info->input_usage_mask[i] = TGSI_WRITEMASK_XYZW;
+
/* Vertex shader inputs don't have semantics. The state
 * tracker has already mapped them to attributes via
 * variable->data.driver_location.
@@ -327,8 +332,6 @@ void si_nir_scan_shader(const struct nir_shader *nir,
continue;
}
 
-   i = variable->data.driver_location;
-
for (unsigned j = 0; j < attrib_count; j++, i++) {
 
if (processed_inputs & ((uint64_t)1 << i))
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] anv/image: Support CCS_E for images which may be used for storage

2018-02-05 Thread Jason Ekstrand
We have to do resolves whenever we go into the general layout for these
images.  However, it also means that images which declare the storage
usage but don't actually need it most of the time will still get
compression.
---
 src/intel/vulkan/anv_image.c | 18 +++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index 011e952..38f1c47 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -459,8 +459,7 @@ make_surface(const struct anv_device *dev,
  * a render target.  This means that it's safe to just leave
  * compression on at all times for these formats.
  */
-if (!(vk_info->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
-all_formats_ccs_e_compatible(>info, vk_info)) {
+if (all_formats_ccs_e_compatible(>info, vk_info)) {
image->planes[plane].aux_usage = ISL_AUX_USAGE_CCS_E;
 }
  }
@@ -795,9 +794,22 @@ anv_layout_to_aux_usage(const struct gen_device_info * 
const devinfo,
   return ISL_AUX_USAGE_NONE;
 
 
+   case VK_IMAGE_LAYOUT_GENERAL:
+  if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {
+ return ISL_AUX_USAGE_NONE;
+  } else if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) {
+ /* If we might be used as a storage image and we're in the general
+  * layout, we have to disable aux because the dataport doesn't
+  * support CCS.
+  */
+ return ISL_AUX_USAGE_NONE;
+  } else {
+ return image->planes[plane].aux_usage;
+  }
+
+
/* Transfer Layouts
 */
-   case VK_IMAGE_LAYOUT_GENERAL:
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
   if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] anv/cmd_buffer: Simplify transition_depth_buffer

2018-02-05 Thread Jason Ekstrand
If we don't have HiZ, then anv_layout_to_aux_usage will return NONE for
both layouts.  If the two layouts are the same, they will get the aux
usage.  In either case, the code below will give us ISL_AUX_OP_NONE and
we'll return without doing anything.
---
 src/intel/vulkan/genX_cmd_buffer.c | 12 
 1 file changed, 12 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 819bd36..a7950cf 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -419,18 +419,6 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
 VkImageLayout initial_layout,
 VkImageLayout final_layout)
 {
-   assert(image);
-
-   /* A transition is a no-op if HiZ is not enabled, or if the initial and
-* final layouts are equal.
-*
-* The undefined layout indicates that the user doesn't care about the data
-* that's currently in the buffer. Therefore, a data-preserving resolve
-* operation is not needed.
-*/
-   if (image->planes[0].aux_usage != ISL_AUX_USAGE_HIZ || initial_layout == 
final_layout)
-  return;
-
const bool hiz_enabled = ISL_AUX_USAGE_HIZ ==
   anv_layout_to_aux_usage(_buffer->device->info, image,
   VK_IMAGE_ASPECT_DEPTH_BIT, initial_layout);
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] anv/cmd_buffer: Use layout_to_* helpers in compute_aux_usage

2018-02-05 Thread Jason Ekstrand
---
 src/intel/vulkan/genX_cmd_buffer.c | 53 +-
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index a7950cf..056528f 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -241,16 +241,27 @@ color_attachment_compute_aux_usage(struct anv_device * 
device,
   att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
   att_state->fast_clear = false;
   return;
-   } else if (iview->image->planes[0].aux_usage == ISL_AUX_USAGE_MCS) {
-  att_state->aux_usage = ISL_AUX_USAGE_MCS;
+   }
+
+   att_state->aux_usage =
+  anv_layout_to_aux_usage(>info, iview->image,
+  VK_IMAGE_ASPECT_COLOR_BIT,
+  VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
+
+   /* If we don't have aux, then we should have returned early in the layer
+* check above.  If we got here, we must have something.
+*/
+   assert(att_state->aux_usage != ISL_AUX_USAGE_NONE);
+
+   if (att_state->aux_usage == ISL_AUX_USAGE_MCS) {
   att_state->input_aux_usage = ISL_AUX_USAGE_MCS;
   att_state->fast_clear = false;
   return;
-   } else if (iview->image->planes[0].aux_usage == ISL_AUX_USAGE_CCS_E) {
-  att_state->aux_usage = ISL_AUX_USAGE_CCS_E;
+   }
+
+   if (att_state->aux_usage == ISL_AUX_USAGE_CCS_E) {
   att_state->input_aux_usage = ISL_AUX_USAGE_CCS_E;
} else {
-  att_state->aux_usage = ISL_AUX_USAGE_CCS_D;
   /* From the Sky Lake PRM, RENDER_SURFACE_STATE::AuxiliarySurfaceMode:
*
*"If Number of Multisamples is MULTISAMPLECOUNT_1, AUX_CCS_D
@@ -292,8 +303,25 @@ color_attachment_compute_aux_usage(struct anv_device * 
device,
   att_state->clear_value.color.uint32[3] == 0;
 
if (att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
-  /* Start off assuming fast clears are possible */
-  att_state->fast_clear = true;
+  /* Start by getting the fast clear type.  We use the first subpass
+   * layout here because we don't want to fast-clear if the first subpass
+   * to use the attachment can't handle fast-clears.
+   */
+  enum anv_fast_clear_type fast_clear_type =
+ anv_layout_to_fast_clear_type(>info, iview->image,
+   VK_IMAGE_ASPECT_COLOR_BIT,
+   
cmd_state->pass->attachments[att].first_subpass_layout);
+  switch (fast_clear_type) {
+  case ANV_FAST_CLEAR_NONE:
+ att_state->fast_clear = false;
+ break;
+  case ANV_FAST_CLEAR_DEFAULT_VALUE:
+ att_state->fast_clear = att_state->clear_color_is_zero;
+ break;
+  case ANV_FAST_CLEAR_ANY:
+ att_state->fast_clear = true;
+ break;
+  }
 
   /* Potentially, we could do partial fast-clears but doing so has crazy
* alignment restrictions.  It's easier to just restrict to full size
@@ -309,17 +337,6 @@ color_attachment_compute_aux_usage(struct anv_device * 
device,
   if (GEN_GEN <= 8 && !att_state->clear_color_is_zero_one)
  att_state->fast_clear = false;
 
-  /* We only allow fast clears in the GENERAL layout if the auxiliary
-   * buffer is always enabled and the fast-clear value is all 0's. See
-   * add_fast_clear_state_buffer() for more information.
-   */
-  if (cmd_state->pass->attachments[att].first_subpass_layout ==
-  VK_IMAGE_LAYOUT_GENERAL &&
-  (!att_state->clear_color_is_zero ||
-   iview->image->planes[0].aux_usage == ISL_AUX_USAGE_NONE)) {
- att_state->fast_clear = false;
-  }
-
   /* We only allow fast clears to the first slice of an image (level 0,
* layer 0) and only for the entire slice.  This guarantees us that, at
* any given time, there is only one clear color on any given image at
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/5] anv/blorp: Use layout_to_aux_usage when a layout is provided

2018-02-05 Thread Jason Ekstrand
Instead of having aux usage and ANV_AUX_USAGE_DEFAULT to mean "give me
something reasonable" we now use anv_layout_to_aux_usage whenever a
layout is available.  If a layout is available, we ignore the aux_usage
parameter.  For the cases where we have an explicit aux usage such as
clears and aux ops, we have a new ANV_IMAGE_LAYOUT_EXPLICIT_AUX layout.
---
 src/intel/vulkan/anv_blorp.c | 71 
 1 file changed, 46 insertions(+), 25 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 4018476..1cef587 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -173,7 +173,10 @@ get_blorp_surf_for_anv_buffer(struct anv_device *device,
assert(ok);
 }
 
-#define ANV_AUX_USAGE_DEFAULT ((enum isl_aux_usage)0xff)
+/* Pick something high enough that it won't be used in core and low enough it
+ * will never map to an extension.
+ */
+#define ANV_IMAGE_LAYOUT_EXPLICIT_AUX (VkImageLayout)1000
 
 static struct blorp_address
 anv_to_blorp_address(struct anv_address addr)
@@ -188,18 +191,14 @@ static void
 get_blorp_surf_for_anv_image(const struct anv_device *device,
  const struct anv_image *image,
  VkImageAspectFlags aspect,
+ VkImageLayout layout,
  enum isl_aux_usage aux_usage,
  struct blorp_surf *blorp_surf)
 {
uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
 
-   if (aux_usage == ANV_AUX_USAGE_DEFAULT) {
-  aux_usage = image->planes[plane].aux_usage;
-
-  /* Blorp copies and blits can't handle HiZ so disable it by default */
-  if (aux_usage == ISL_AUX_USAGE_HIZ)
- aux_usage = ISL_AUX_USAGE_NONE;
-   }
+   if (layout != ANV_IMAGE_LAYOUT_EXPLICIT_AUX)
+  aux_usage = anv_layout_to_aux_usage(>info, image, aspect, 
layout);
 
const struct anv_surface *surface = >planes[plane].surface;
*blorp_surf = (struct blorp_surf) {
@@ -279,10 +278,12 @@ void anv_CmdCopyImage(
 struct blorp_surf src_surf, dst_surf;
 get_blorp_surf_for_anv_image(cmd_buffer->device,
  src_image, 1UL << aspect_bit,
- ANV_AUX_USAGE_DEFAULT, _surf);
+ srcImageLayout, ISL_AUX_USAGE_NONE,
+ _surf);
 get_blorp_surf_for_anv_image(cmd_buffer->device,
  dst_image, 1UL << aspect_bit,
- ANV_AUX_USAGE_DEFAULT, _surf);
+ dstImageLayout, ISL_AUX_USAGE_NONE,
+ _surf);
 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
   1UL << aspect_bit,
   dst_surf.aux_usage, dst_level,
@@ -299,9 +300,11 @@ void anv_CmdCopyImage(
   } else {
  struct blorp_surf src_surf, dst_surf;
  get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, src_mask,
-  ANV_AUX_USAGE_DEFAULT, _surf);
+  srcImageLayout, ISL_AUX_USAGE_NONE,
+  _surf);
  get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, dst_mask,
-  ANV_AUX_USAGE_DEFAULT, _surf);
+  dstImageLayout, ISL_AUX_USAGE_NONE,
+  _surf);
  anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, dst_mask,
dst_surf.aux_usage, dst_level,
dst_base_layer, layer_count);
@@ -323,6 +326,7 @@ static void
 copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
  struct anv_buffer *anv_buffer,
  struct anv_image *anv_image,
+ VkImageLayout image_layout,
  uint32_t regionCount,
  const VkBufferImageCopy* pRegions,
  bool buffer_to_image)
@@ -351,7 +355,8 @@ copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
   const VkImageAspectFlags aspect = 
pRegions[r].imageSubresource.aspectMask;
 
   get_blorp_surf_for_anv_image(cmd_buffer->device, anv_image, aspect,
-   ANV_AUX_USAGE_DEFAULT, );
+   image_layout, ISL_AUX_USAGE_NONE,
+   );
   image.offset =
  anv_sanitize_image_offset(anv_image->type, pRegions[r].imageOffset);
   image.level = pRegions[r].imageSubresource.mipLevel;
@@ -426,7 +431,7 @@ void anv_CmdCopyBufferToImage(
ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
ANV_FROM_HANDLE(anv_image, 

[Mesa-dev] [PATCH 0/5] anv: Support CCS_E for images which may be used for

2018-02-05 Thread Jason Ekstrand
This little series adds support for enabling CCS_E for images which may
have VK_IMAGE_USAGE_STORAGE_BIT set.  Previously, we just bailed on these
images and disabled CCS.  However, so long as we do a full resolve when
entering VK_IMAGE_LAYOUT_GENERAL, we can support CCS_E for all the other
layouts just fine.

The primary motivation of this series is actually by trying to get better
test coverage of our resolve code.  By doing resolves when going into
VK_IMAGE_LAYOUT_GENERAL, the full resolve paths now get tested on many more
image types.  I have no idea what the perf impact of this will be.

The first 4 patches just make use use layout_to_* more often.

Jason Ekstrand (5):
  anv/cmd_buffer: Simplify transition_depth_buffer
  anv/cmd_buffer: Use layout_to_* helpers in compute_aux_usage
  anv/cmd_buffer: Delete some assert-only variables
  anv/blorp: Use layout_to_aux_usage when a layout is provided
  anv/image: Support CCS_E for images which may be used for storage

 src/intel/vulkan/anv_blorp.c   | 71 --
 src/intel/vulkan/anv_image.c   | 18 --
 src/intel/vulkan/genX_cmd_buffer.c | 70 +++--
 3 files changed, 97 insertions(+), 62 deletions(-)

-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] anv/cmd_buffer: Delete some assert-only variables

2018-02-05 Thread Jason Ekstrand
Checking the sample count is almost as good as aux usage in this case.
---
 src/intel/vulkan/genX_cmd_buffer.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 056528f..afe577c 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -732,9 +732,6 @@ init_fast_clear_color(struct anv_cmd_buffer *cmd_buffer,
set_image_fast_clear_state(cmd_buffer, image, aspect,
   ANV_FAST_CLEAR_NONE);
 
-   uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
-   enum isl_aux_usage aux_usage = image->planes[plane].aux_usage;
-
/* The fast clear value dword(s) will be copied into a surface state object.
 * Ensure that the restrictions of the fields in the dword(s) are followed.
 *
@@ -755,7 +752,7 @@ init_fast_clear_color(struct anv_cmd_buffer *cmd_buffer,
 
  if (GEN_GEN >= 9) {
 /* MCS buffers on SKL+ can only have 1/0 clear colors. */
-assert(aux_usage == ISL_AUX_USAGE_MCS);
+assert(image->samples > 1);
 sdi.ImmediateData = 0;
  } else if (GEN_VERSIONx10 >= 75) {
 /* Pre-SKL, the dword containing the clear values also contains
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/14] anv/cmd_buffer: Avoid unnecessary transitions before fast clears

2018-02-05 Thread Jason Ekstrand
Previously, we would always apply the layout transition at the beginning
of the subpass and then do the clear whether fast or slow.  This meant
that there were some cases, specifically when the initial layout is
VK_IMAGE_LAYOUT_UNDEFINED, where we would end up doing a fast-clear or
ambiguate followed immediately by a fast-clear.  This probably isn't
terribly expensive, but it is a waste that we can avoid easily enough
now that we're doing everything at the same time in begin_subpass.
---
 src/intel/vulkan/genX_cmd_buffer.c | 57 ++
 1 file changed, 33 insertions(+), 24 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 2732ef3..819bd36 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -3326,39 +3326,25 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer 
*cmd_buffer,
  target_layout = subpass->attachments[i].layout;
   }
 
-  if (image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
- assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
- transition_color_buffer(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT,
- iview->planes[0].isl.base_level, 1,
- iview->planes[0].isl.base_array_layer,
- iview->planes[0].isl.array_len,
- att_state->current_layout, target_layout);
-  } else if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
- transition_depth_buffer(cmd_buffer, image,
- att_state->current_layout, target_layout);
- att_state->aux_usage =
-anv_layout_to_aux_usage(_buffer->device->info, image,
-VK_IMAGE_ASPECT_DEPTH_BIT, target_layout);
-  }
-  att_state->current_layout = target_layout;
+  uint32_t base_layer = iview->planes[0].isl.base_array_layer;
+  uint32_t layer_count = fb->layers;
 
-  if (att_state->pending_clear_aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
- assert(att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
-
- /* Multi-planar images are not supported as attachments */
+  if (image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
  assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
- assert(image->n_planes == 1);
-
- uint32_t base_layer = iview->planes[0].isl.base_array_layer;
- uint32_t layer_count = fb->layers;
 
- if (att_state->fast_clear) {
+ if ((att_state->pending_clear_aspects & VK_IMAGE_ASPECT_COLOR_BIT) &&
+ att_state->fast_clear) {
 /* We only support fast-clears on the first layer */
 assert(iview->planes[0].isl.base_level == 0);
 assert(iview->planes[0].isl.base_array_layer == 0);
 
 anv_image_ccs_op(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT,
  0, 0, 1, ISL_AUX_OP_FAST_CLEAR, false);
+
+/* Performing a fast clear takes care of all our transition needs
+ * for the first slice.  Increment the base layer and layer count
+ * so that later transitions and clears don't touch layer 0.
+ */
 base_layer++;
 layer_count--;
 
@@ -3383,6 +3369,29 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer 
*cmd_buffer,
  }
 
  if (layer_count > 0) {
+transition_color_buffer(cmd_buffer, image,
+VK_IMAGE_ASPECT_COLOR_BIT,
+iview->planes[0].isl.base_level, 1,
+base_layer, layer_count,
+att_state->current_layout, target_layout);
+ }
+  } else if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
+ transition_depth_buffer(cmd_buffer, image,
+ att_state->current_layout, target_layout);
+ att_state->aux_usage =
+anv_layout_to_aux_usage(_buffer->device->info, image,
+VK_IMAGE_ASPECT_DEPTH_BIT, target_layout);
+  }
+  att_state->current_layout = target_layout;
+
+  if (att_state->pending_clear_aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
+ assert(att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
+
+ /* Multi-planar images are not supported as attachments */
+ assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(image->n_planes == 1);
+
+ if (layer_count > 0) {
 assert(image->n_planes == 1);
 anv_image_clear_color(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT,
   att_state->aux_usage,
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/14] anv/cmd_buffer: Do subpass image transitions in begin/end_subpass

2018-02-05 Thread Jason Ekstrand
---
 src/intel/vulkan/genX_cmd_buffer.c | 190 +
 1 file changed, 68 insertions(+), 122 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 2d17c28..2732ef3 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -3257,120 +3257,6 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer 
*cmd_buffer)
cmd_buffer->state.hiz_enabled = info.hiz_usage == ISL_AUX_USAGE_HIZ;
 }
 
-
-/**
- * @brief Perform any layout transitions required at the beginning and/or end
- *of the current subpass for depth buffers.
- *
- * TODO: Consider preprocessing the attachment reference array at render pass
- *   create time to determine if no layout transition is needed at the
- *   beginning and/or end of each subpass.
- *
- * @param cmd_buffer The command buffer the transition is happening within.
- * @param subpass_end If true, marks that the transition is happening at the
- *end of the subpass.
- */
-static void
-cmd_buffer_subpass_transition_layouts(struct anv_cmd_buffer * const cmd_buffer,
-  const bool subpass_end)
-{
-   /* We need a non-NULL command buffer. */
-   assert(cmd_buffer);
-
-   const struct anv_cmd_state * const cmd_state = _buffer->state;
-   const struct anv_subpass * const subpass = cmd_state->subpass;
-
-   /* This function must be called within a subpass. */
-   assert(subpass);
-
-   /* If there are attachment references, the array shouldn't be NULL.
-*/
-   if (subpass->attachment_count > 0)
-  assert(subpass->attachments);
-
-   /* Iterate over the array of attachment references. */
-   for (const struct anv_subpass_attachment *att_ref = subpass->attachments;
-att_ref < subpass->attachments + subpass->attachment_count; att_ref++) 
{
-
-  /* If the attachment is unused, we can't perform a layout transition. */
-  if (att_ref->attachment == VK_ATTACHMENT_UNUSED)
- continue;
-
-  /* This attachment index shouldn't go out of bounds. */
-  assert(att_ref->attachment < cmd_state->pass->attachment_count);
-
-  const struct anv_render_pass_attachment * const att_desc =
- _state->pass->attachments[att_ref->attachment];
-  struct anv_attachment_state * const att_state =
- _buffer->state.attachments[att_ref->attachment];
-
-  /* The attachment should not be used in a subpass after its last. */
-  assert(att_desc->last_subpass_idx >= anv_get_subpass_id(cmd_state));
-
-  if (subpass_end && anv_get_subpass_id(cmd_state) <
-  att_desc->last_subpass_idx) {
- /* We're calling this function on a buffer twice in one subpass and
-  * this is not the last use of the buffer. The layout should not have
-  * changed from the first call and no transition is necessary.
-  */
- assert(att_state->current_layout == att_ref->layout ||
-att_state->current_layout ==
-VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
- continue;
-  }
-
-  /* The attachment index must be less than the number of attachments
-   * within the framebuffer.
-   */
-  assert(att_ref->attachment < cmd_state->framebuffer->attachment_count);
-
-  const struct anv_image_view * const iview =
- cmd_state->framebuffer->attachments[att_ref->attachment];
-  const struct anv_image * const image = iview->image;
-
-  /* Get the appropriate target layout for this attachment. */
-  VkImageLayout target_layout;
-
-  /* A resolve is necessary before use as an input attachment if the clear
-   * color or auxiliary buffer usage isn't supported by the sampler.
-   */
-  const bool input_needs_resolve =
-(att_state->fast_clear && !att_state->clear_color_is_zero_one) ||
-att_state->input_aux_usage != att_state->aux_usage;
-  if (subpass_end) {
- target_layout = att_desc->final_layout;
-  } else if (iview->aspect_mask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV &&
- !input_needs_resolve) {
- /* Layout transitions before the final only help to enable sampling as
-  * an input attachment. If the input attachment supports sampling
-  * using the auxiliary surface, we can skip such transitions by making
-  * the target layout one that is CCS-aware.
-  */
- target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-  } else {
- target_layout = att_ref->layout;
-  }
-
-  /* Perform the layout transition. */
-  if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
- transition_depth_buffer(cmd_buffer, image,
- att_state->current_layout, target_layout);
- att_state->aux_usage =
-anv_layout_to_aux_usage(_buffer->device->info, image,
-VK_IMAGE_ASPECT_DEPTH_BIT, 

[Mesa-dev] [PATCH 01/14] anv/cmd_buffer: Apply subpass flushes before set_subpass

2018-02-05 Thread Jason Ekstrand
This seems slightly more correct because it means that the flushes
happen before any clears or resolves implied by the subpass transition.
---
 src/intel/vulkan/genX_cmd_buffer.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 7c86d70..c234300 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -3502,10 +3502,10 @@ void genX(CmdBeginRenderPass)(
 
genX(flush_pipeline_select_3d)(cmd_buffer);
 
-   genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses);
-
cmd_buffer->state.pending_pipe_bits |=
   cmd_buffer->state.pass->subpass_flushes[0];
+
+   genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses);
 }
 
 void genX(CmdNextSubpass)(
@@ -3525,11 +3525,11 @@ void genX(CmdNextSubpass)(
 */
cmd_buffer_subpass_transition_layouts(cmd_buffer, true);
 
-   genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1);
-
uint32_t subpass_id = anv_get_subpass_id(_buffer->state);
cmd_buffer->state.pending_pipe_bits |=
   cmd_buffer->state.pass->subpass_flushes[subpass_id];
+
+   genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1);
 }
 
 void genX(CmdEndRenderPass)(
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/14] anv/cmd_buffer: Move the color portion of clear_subpass into begin_subpass

2018-02-05 Thread Jason Ekstrand
This doesn't really change much now but it will give us more/better
control over clears in the future.  The one interesting functional
change here is that we are now re-emitting 3DSTATE_DEPTH_BUFFERS and
friends for each clear.  However, this only happens at begin_subpass
time so it shouldn't be substantially more expensive.
---
 src/intel/vulkan/anv_blorp.c   | 124 ++---
 src/intel/vulkan/anv_private.h |   8 +++
 src/intel/vulkan/genX_cmd_buffer.c |  54 +++-
 3 files changed, 94 insertions(+), 92 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index d38b343..fd32227 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1142,17 +1142,6 @@ subpass_needs_clear(const struct anv_cmd_buffer 
*cmd_buffer)
const struct anv_cmd_state *cmd_state = _buffer->state;
uint32_t ds = cmd_state->subpass->depth_stencil_attachment.attachment;
 
-   for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
-  uint32_t a = cmd_state->subpass->color_attachments[i].attachment;
-  if (a == VK_ATTACHMENT_UNUSED)
- continue;
-
-  assert(a < cmd_state->pass->attachment_count);
-  if (cmd_state->attachments[a].pending_clear_aspects) {
- return true;
-  }
-   }
-
if (ds != VK_ATTACHMENT_UNUSED) {
   assert(ds < cmd_state->pass->attachment_count);
   if (cmd_state->attachments[ds].pending_clear_aspects)
@@ -1186,86 +1175,6 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer 
*cmd_buffer)
};
 
struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
-   for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
-  const uint32_t a = cmd_state->subpass->color_attachments[i].attachment;
-  if (a == VK_ATTACHMENT_UNUSED)
- continue;
-
-  assert(a < cmd_state->pass->attachment_count);
-  struct anv_attachment_state *att_state = _state->attachments[a];
-
-  if (!att_state->pending_clear_aspects)
- continue;
-
-  assert(att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
-
-  struct anv_image_view *iview = fb->attachments[a];
-  const struct anv_image *image = iview->image;
-  struct blorp_surf surf;
-  get_blorp_surf_for_anv_image(cmd_buffer->device,
-   image, VK_IMAGE_ASPECT_COLOR_BIT,
-   att_state->aux_usage, );
-
-  uint32_t base_layer = iview->planes[0].isl.base_array_layer;
-  uint32_t layer_count = fb->layers;
-
-  if (att_state->fast_clear) {
- surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
-
- /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
-  *
-  *"After Render target fast clear, pipe-control with color cache
-  *write-flush must be issued before sending any DRAW commands on
-  *that render target."
-  *
-  * This comment is a bit cryptic and doesn't really tell you what's
-  * going or what's really needed.  It appears that fast clear ops are
-  * not properly synchronized with other drawing.  This means that we
-  * cannot have a fast clear operation in the pipe at the same time as
-  * other regular drawing operations.  We need to use a PIPE_CONTROL
-  * to ensure that the contents of the previous draw hit the render
-  * target before we resolve and then use a second PIPE_CONTROL after
-  * the resolve to ensure that it is completed before any additional
-  * drawing occurs.
-  */
- cmd_buffer->state.pending_pipe_bits |=
-ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
-
- /* We only support fast-clears on the first layer */
- assert(iview->planes[0].isl.base_level == 0);
- assert(iview->planes[0].isl.base_array_layer == 0);
-
- assert(image->n_planes == 1);
- blorp_fast_clear(, , iview->planes[0].isl.format, 0, 0, 1,
-  render_area.offset.x, render_area.offset.y,
-  render_area.offset.x + render_area.extent.width,
-  render_area.offset.y + render_area.extent.height);
- base_layer++;
- layer_count--;
-
- cmd_buffer->state.pending_pipe_bits |=
-ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
-  }
-
-  if (layer_count > 0) {
- assert(image->n_planes == 1);
- anv_cmd_buffer_mark_image_written(cmd_buffer, image,
-   VK_IMAGE_ASPECT_COLOR_BIT,
-   att_state->aux_usage,
-   iview->planes[0].isl.base_level,
-   base_layer, layer_count);
-
- blorp_clear(, , iview->planes[0].isl.format,
- 

[Mesa-dev] [PATCH 07/14] anv/cmd_buffer: Decide whether or not to HiZ clear up-front

2018-02-05 Thread Jason Ekstrand
This moves the decision out of begin_subpass and into BeginRenderPass
like the decision for color clears.  We use a similar name for the
function for depth/stencil as for color even though no aux usage is
really getting computed.
---
 src/intel/vulkan/genX_cmd_buffer.c | 84 +++---
 1 file changed, 50 insertions(+), 34 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 21fdc6b..ab79fbf 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -350,6 +350,52 @@ color_attachment_compute_aux_usage(struct anv_device * 
device,
}
 }
 
+static void
+depth_stencil_attachment_compute_aux_usage(struct anv_device *device,
+   struct anv_cmd_state *cmd_state,
+   uint32_t att, VkRect2D render_area)
+{
+   struct anv_attachment_state *att_state = _state->attachments[att];
+   struct anv_image_view *iview = cmd_state->framebuffer->attachments[att];
+
+   /* These will be initialized after the first subpass transition. */
+   att_state->aux_usage = ISL_AUX_USAGE_NONE;
+   att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
+
+   if (att_state->aux_usage != ISL_AUX_USAGE_HIZ) {
+  att_state->fast_clear = false;
+  return;
+   } else if (!(att_state->pending_clear_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) 
{
+  /* If we're just clearing stencil, we can always HiZ clear */
+  att_state->fast_clear = true;
+  return;
+   }
+
+   if (!blorp_can_hiz_clear_depth(GEN_GEN,
+  iview->planes[0].isl.format,
+  iview->image->samples,
+  render_area.offset.x,
+  render_area.offset.y,
+  render_area.offset.x +
+  render_area.extent.width,
+  render_area.offset.y +
+  render_area.extent.height)) {
+  att_state->fast_clear = false;
+   } else if (att_state->clear_value.depthStencil.depth != ANV_HZ_FC_VAL) {
+  att_state->fast_clear = false;
+   } else if (GEN_GEN == 8 &&
+  anv_can_sample_with_hiz(>info, iview->image)) {
+  /* Only gen9+ supports returning ANV_HZ_FC_VAL when sampling a
+   * fast-cleared portion of a HiZ buffer. Testing has revealed that Gen8
+   * only supports returning 0.0f. Gens prior to gen8 do not support this
+   * feature at all.
+   */
+  att_state->fast_clear = false;
+   } else {
+  att_state->fast_clear = true;
+   }
+}
+
 static bool
 need_input_attachment_state(const struct anv_render_pass_attachment *att)
 {
@@ -1125,12 +1171,9 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer 
*cmd_buffer,
 add_image_view_relocs(cmd_buffer, iview, 0,
   state->attachments[i].color);
  } else {
-/* This field will be initialized after the first subpass
- * transition.
- */
-state->attachments[i].aux_usage = ISL_AUX_USAGE_NONE;
-
-state->attachments[i].input_aux_usage = ISL_AUX_USAGE_NONE;
+depth_stencil_attachment_compute_aux_usage(cmd_buffer->device,
+   state, i,
+   begin->renderArea);
  }
 
  if (need_input_attachment_state(>attachments[i])) {
@@ -3541,34 +3584,7 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer 
*cmd_buffer,
VK_IMAGE_ASPECT_STENCIL_BIT));
 
   if (att_state->pending_clear_aspects) {
- bool clear_with_hiz = att_state->aux_usage == ISL_AUX_USAGE_HIZ;
- if (clear_with_hiz &&
- (att_state->pending_clear_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
-if (!blorp_can_hiz_clear_depth(GEN_GEN,
-   iview->planes[0].isl.format,
-   iview->image->samples,
-   render_area.offset.x,
-   render_area.offset.y,
-   render_area.offset.x +
-   render_area.extent.width,
-   render_area.offset.y +
-   render_area.extent.height)) {
-   clear_with_hiz = false;
-} else if (att_state->clear_value.depthStencil.depth != 
ANV_HZ_FC_VAL) {
-   clear_with_hiz = false;
-} else if (GEN_GEN == 8 &&
-   anv_can_sample_with_hiz(_buffer->device->info,
-   iview->image)) {
-   /* Only gen9+ supports returning ANV_HZ_FC_VAL when sampling a
-* fast-cleared 

[Mesa-dev] [PATCH 02/14] anv/cmd_buffer: Add begin/end_subpass helpers

2018-02-05 Thread Jason Ekstrand
Having begin/end_subpass is a bit nicer than the begin/next/end hooks
that Vulkan gives us.
---
 src/intel/vulkan/genX_cmd_buffer.c | 55 +-
 1 file changed, 31 insertions(+), 24 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index c234300..3e37eaf 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -3432,10 +3432,11 @@ cmd_buffer_subpass_sync_fast_clear_values(struct 
anv_cmd_buffer *cmd_buffer)
 
 
 static void
-genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer,
- struct anv_subpass *subpass)
+cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_subpass *subpass)
 {
cmd_buffer->state.subpass = subpass;
+   uint32_t subpass_id = anv_get_subpass_id(_buffer->state);
 
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS;
 
@@ -3460,6 +3461,10 @@ genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer 
*cmd_buffer,
 */
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE;
 
+   /* Accumulate any subpass flushes that need to happen before the subpass */
+   cmd_buffer->state.pending_pipe_bits |=
+  cmd_buffer->state.pass->subpass_flushes[subpass_id];
+
/* Perform transitions to the subpass layout before any writes have
 * occurred.
 */
@@ -3479,6 +3484,26 @@ genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer 
*cmd_buffer,
anv_cmd_buffer_clear_subpass(cmd_buffer);
 }
 
+static void
+cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer)
+{
+   uint32_t subpass_id = anv_get_subpass_id(_buffer->state);
+
+   anv_cmd_buffer_resolve_subpass(cmd_buffer);
+
+   /* Perform transitions to the final layout after all writes have occurred.
+*/
+   cmd_buffer_subpass_transition_layouts(cmd_buffer, true);
+
+   /* Accumulate any subpass flushes that need to happen after the subpass.
+* Yes, they do get accumulated twice in the NextSubpass case but since
+* genX_CmdNextSubpass just calls end/begin back-to-back, we just end up
+* ORing the bits in twice so it's harmless.
+*/
+   cmd_buffer->state.pending_pipe_bits |=
+  cmd_buffer->state.pass->subpass_flushes[subpass_id + 1];
+}
+
 void genX(CmdBeginRenderPass)(
 VkCommandBuffer commandBuffer,
 const VkRenderPassBeginInfo*pRenderPassBegin,
@@ -3502,10 +3527,7 @@ void genX(CmdBeginRenderPass)(
 
genX(flush_pipeline_select_3d)(cmd_buffer);
 
-   cmd_buffer->state.pending_pipe_bits |=
-  cmd_buffer->state.pass->subpass_flushes[0];
-
-   genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses);
+   cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses);
 }
 
 void genX(CmdNextSubpass)(
@@ -3519,17 +3541,9 @@ void genX(CmdNextSubpass)(
 
assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
 
-   anv_cmd_buffer_resolve_subpass(cmd_buffer);
-
-   /* Perform transitions to the final layout after all writes have occurred.
-*/
-   cmd_buffer_subpass_transition_layouts(cmd_buffer, true);
-
-   uint32_t subpass_id = anv_get_subpass_id(_buffer->state);
-   cmd_buffer->state.pending_pipe_bits |=
-  cmd_buffer->state.pass->subpass_flushes[subpass_id];
+   cmd_buffer_end_subpass(cmd_buffer);
 
-   genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1);
+   cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1);
 }
 
 void genX(CmdEndRenderPass)(
@@ -3540,14 +3554,7 @@ void genX(CmdEndRenderPass)(
if (anv_batch_has_error(_buffer->batch))
   return;
 
-   anv_cmd_buffer_resolve_subpass(cmd_buffer);
-
-   /* Perform transitions to the final layout after all writes have occurred.
-*/
-   cmd_buffer_subpass_transition_layouts(cmd_buffer, true);
-
-   cmd_buffer->state.pending_pipe_bits |=
-  
cmd_buffer->state.pass->subpass_flushes[cmd_buffer->state.pass->subpass_count];
+   cmd_buffer_end_subpass(cmd_buffer);
 
cmd_buffer->state.hiz_enabled = false;
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/14] anv/cmd_buffer: Mark depth/stencil surfaces written in begin_subpass

2018-02-05 Thread Jason Ekstrand
---
 src/intel/vulkan/genX_cmd_buffer.c | 50 ++
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 4eee85a..2d17c28 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -3255,27 +3255,6 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer 
*cmd_buffer)
isl_emit_depth_stencil_hiz_s(>isl_dev, dw, );
 
cmd_buffer->state.hiz_enabled = info.hiz_usage == ISL_AUX_USAGE_HIZ;
-
-   /* We may be writing depth or stencil so we need to mark the surface.
-* Unfortunately, there's no way to know at this point whether the depth or
-* stencil tests used will actually write to the surface.
-*/
-   if (image && (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
-  genX(cmd_buffer_mark_image_written)(cmd_buffer, image,
-  VK_IMAGE_ASPECT_DEPTH_BIT,
-  info.hiz_usage,
-  info.view->base_level,
-  info.view->base_array_layer,
-  info.view->array_len);
-   }
-   if (image && (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT)) {
-  genX(cmd_buffer_mark_image_written)(cmd_buffer, image,
-  VK_IMAGE_ASPECT_STENCIL_BIT,
-  ISL_AUX_USAGE_NONE,
-  info.view->base_level,
-  info.view->base_array_layer,
-  info.view->array_len);
-   }
 }
 
 
@@ -3550,6 +3529,35 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer 
*cmd_buffer,
  iview->planes[0].isl.base_level,
  
iview->planes[0].isl.base_array_layer,
  fb->layers);
+  } else if (subpass->attachments[i].usage ==
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
+ /* We may be writing depth or stencil so we need to mark the surface.
+  * Unfortunately, there's no way to know at this point whether the
+  * depth or stencil tests used will actually write to the surface.
+  *
+  * Even though stencil may be plane 1, it always shares a base_level
+  * with depth.
+  */
+ const struct isl_view *ds_view = >planes[0].isl;
+ if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
+genX(cmd_buffer_mark_image_written)(cmd_buffer, image,
+VK_IMAGE_ASPECT_DEPTH_BIT,
+att_state->aux_usage,
+ds_view->base_level,
+ds_view->base_array_layer,
+fb->layers);
+ }
+ if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
+/* Even though stencil may be plane 1, it always shares a
+ * base_level with depth.
+ */
+genX(cmd_buffer_mark_image_written)(cmd_buffer, image,
+VK_IMAGE_ASPECT_STENCIL_BIT,
+ISL_AUX_USAGE_NONE,
+ds_view->base_level,
+ds_view->base_array_layer,
+fb->layers);
+ }
   }
 
   att_state->pending_clear_aspects = 0;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/14] anv/cmd_buffer: Pass a subpass id into begin_subpass

2018-02-05 Thread Jason Ekstrand
This is a bit less awkward than passing in the subpass because it means
we don't have to extract the subpass id from the subpass.
---
 src/intel/vulkan/genX_cmd_buffer.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 3e37eaf..519d14f 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -3430,13 +3430,11 @@ cmd_buffer_subpass_sync_fast_clear_values(struct 
anv_cmd_buffer *cmd_buffer)
}
 }
 
-
 static void
 cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
- struct anv_subpass *subpass)
+ uint32_t subpass_id)
 {
-   cmd_buffer->state.subpass = subpass;
-   uint32_t subpass_id = anv_get_subpass_id(_buffer->state);
+   cmd_buffer->state.subpass = _buffer->state.pass->subpasses[subpass_id];
 
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS;
 
@@ -3527,7 +3525,7 @@ void genX(CmdBeginRenderPass)(
 
genX(flush_pipeline_select_3d)(cmd_buffer);
 
-   cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses);
+   cmd_buffer_begin_subpass(cmd_buffer, 0);
 }
 
 void genX(CmdNextSubpass)(
@@ -3541,9 +3539,9 @@ void genX(CmdNextSubpass)(
 
assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
 
+   uint32_t prev_subpass = anv_get_subpass_id(_buffer->state);
cmd_buffer_end_subpass(cmd_buffer);
-
-   cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1);
+   cmd_buffer_begin_subpass(cmd_buffer, prev_subpass + 1);
 }
 
 void genX(CmdEndRenderPass)(
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/14] anv/cmd_buffer: Sync clear values in begin_subpass

2018-02-05 Thread Jason Ekstrand
This is quite a bit cleaner because we now sync the clear values at the
same time as we do the fast clear.  For loading the clear values into
the surface state, we now do it once when we handle the LOAD_OP_LOAD
instead of every subpass.
---
 src/intel/vulkan/genX_cmd_buffer.c | 148 -
 1 file changed, 48 insertions(+), 100 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index f92e86f..4eee85a 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -3392,97 +3392,6 @@ cmd_buffer_subpass_transition_layouts(struct 
anv_cmd_buffer * const cmd_buffer,
}
 }
 
-/* Update the clear value dword(s) in surface state objects or the fast clear
- * state buffer entry for the color attachments used in this subpass.
- */
-static void
-cmd_buffer_subpass_sync_fast_clear_values(struct anv_cmd_buffer *cmd_buffer)
-{
-   assert(cmd_buffer && cmd_buffer->state.subpass);
-
-   const struct anv_cmd_state *state = _buffer->state;
-
-   /* Iterate through every color attachment used in this subpass. */
-   for (uint32_t i = 0; i < state->subpass->color_count; ++i) {
-
-  /* The attachment should be one of the attachments described in the
-   * render pass and used in the subpass.
-   */
-  const uint32_t a = state->subpass->color_attachments[i].attachment;
-  if (a == VK_ATTACHMENT_UNUSED)
- continue;
-
-  assert(a < state->pass->attachment_count);
-
-  /* Store some information regarding this attachment. */
-  const struct anv_attachment_state *att_state = >attachments[a];
-  const struct anv_image_view *iview = state->framebuffer->attachments[a];
-  const struct anv_render_pass_attachment *rp_att =
- >pass->attachments[a];
-
-  if (att_state->aux_usage == ISL_AUX_USAGE_NONE)
- continue;
-
-  /* The fast clear state entry must be updated if a fast clear is going to
-   * happen. The surface state must be updated if the clear value from a
-   * prior fast clear may be needed.
-   */
-  if (att_state->pending_clear_aspects && att_state->fast_clear) {
- /* Update the fast clear state entry. */
- genX(copy_fast_clear_dwords)(cmd_buffer, att_state->color.state,
-  iview->image,
-  VK_IMAGE_ASPECT_COLOR_BIT,
-  true /* copy from ss */);
-
- /* Fast-clears impact whether or not a resolve will be necessary. */
- if (att_state->clear_color_is_zero) {
-/* This image always has the auxiliary buffer enabled. We can mark
- * the subresource as not needing a resolve because the clear color
- * will match what's in every RENDER_SURFACE_STATE object when it's
- * being used for sampling.
- */
-set_image_fast_clear_state(cmd_buffer, iview->image,
-   VK_IMAGE_ASPECT_COLOR_BIT,
-   ANV_FAST_CLEAR_DEFAULT_VALUE);
- } else {
-set_image_fast_clear_state(cmd_buffer, iview->image,
-   VK_IMAGE_ASPECT_COLOR_BIT,
-   ANV_FAST_CLEAR_ANY);
- }
-  } else if (rp_att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD &&
- iview->planes[0].isl.base_level == 0 &&
- iview->planes[0].isl.base_array_layer == 0) {
- /* The attachment may have been fast-cleared in a previous render
-  * pass and the value is needed now. Update the surface state(s).
-  *
-  * TODO: Do this only once per render pass instead of every subpass.
-  */
- genX(copy_fast_clear_dwords)(cmd_buffer, att_state->color.state,
-  iview->image,
-  VK_IMAGE_ASPECT_COLOR_BIT,
-  false /* copy to ss */);
-
- if (need_input_attachment_state(rp_att) &&
- att_state->input_aux_usage != ISL_AUX_USAGE_NONE) {
-genX(copy_fast_clear_dwords)(cmd_buffer, att_state->input.state,
- iview->image,
- VK_IMAGE_ASPECT_COLOR_BIT,
- false /* copy to ss */);
- }
-  }
-
-  /* We assume that if we're starting a subpass, we're going to do some
-   * rendering so we may end up with compressed data.
-   */
-  genX(cmd_buffer_mark_image_written)(cmd_buffer, iview->image,
-  VK_IMAGE_ASPECT_COLOR_BIT,
-  att_state->aux_usage,
-  iview->planes[0].isl.base_level,
-  
iview->planes[0].isl.base_array_layer,
-   

[Mesa-dev] [PATCH 05/14] intel/blorp: Add a blorp_hiz_clear_depth_stencil helper

2018-02-05 Thread Jason Ekstrand
This is similar to blorp_gen8_hiz_clear_attachments except that it takes
actual images instead of trusting in the already set depth state.
---
 src/intel/blorp/blorp.h   | 11 ++
 src/intel/blorp/blorp_clear.c | 50 +++
 2 files changed, 61 insertions(+)

diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
index ce3762c..4626f2f 100644
--- a/src/intel/blorp/blorp.h
+++ b/src/intel/blorp/blorp.h
@@ -170,6 +170,17 @@ blorp_can_hiz_clear_depth(uint8_t gen, enum isl_format 
format,
   uint32_t num_samples,
   uint32_t x0, uint32_t y0,
   uint32_t x1, uint32_t y1);
+void
+blorp_hiz_clear_depth_stencil(struct blorp_batch *batch,
+  const struct blorp_surf *depth,
+  const struct blorp_surf *stencil,
+  uint32_t level,
+  uint32_t start_layer, uint32_t num_layers,
+  uint32_t x0, uint32_t y0,
+  uint32_t x1, uint32_t y1,
+  bool clear_depth, float depth_value,
+  bool clear_stencil, uint8_t stencil_value);
+
 
 void
 blorp_gen8_hiz_clear_attachments(struct blorp_batch *batch,
diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
index 32ec31b..ccbbda0 100644
--- a/src/intel/blorp/blorp_clear.c
+++ b/src/intel/blorp/blorp_clear.c
@@ -612,6 +612,56 @@ blorp_can_hiz_clear_depth(uint8_t gen, enum isl_format 
format,
return true;
 }
 
+void
+blorp_hiz_clear_depth_stencil(struct blorp_batch *batch,
+  const struct blorp_surf *depth,
+  const struct blorp_surf *stencil,
+  uint32_t level,
+  uint32_t start_layer, uint32_t num_layers,
+  uint32_t x0, uint32_t y0,
+  uint32_t x1, uint32_t y1,
+  bool clear_depth, float depth_value,
+  bool clear_stencil, uint8_t stencil_value)
+{
+   struct blorp_params params;
+   blorp_params_init();
+
+   /* This requires WM_HZ_OP which only exists on gen8+ */
+   assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 8);
+
+   params.hiz_op = ISL_AUX_OP_FAST_CLEAR;
+   params.num_layers = 1;
+
+   params.x0 = x0;
+   params.y0 = y0;
+   params.x1 = x1;
+   params.y1 = y1;
+
+   for (uint32_t l = 0; l < num_layers; l++) {
+  const uint32_t layer = start_layer + l;
+  if (clear_stencil) {
+ brw_blorp_surface_info_init(batch->blorp, , stencil,
+ level, layer,
+ ISL_FORMAT_UNSUPPORTED, true);
+ params.stencil_mask = 0xff;
+ params.stencil_ref = stencil_value;
+ params.num_samples = params.stencil.surf.samples;
+  }
+
+  if (clear_depth) {
+ brw_blorp_surface_info_init(batch->blorp, , depth,
+ level, layer,
+ ISL_FORMAT_UNSUPPORTED, true);
+ params.depth.clear_color.f32[0] = depth_value;
+ params.depth_format =
+isl_format_get_depth_format(depth->surf->format, false);
+ params.num_samples = params.depth.surf.samples;
+  }
+
+  batch->blorp->exec(batch, );
+   }
+}
+
 /* Given a depth stencil attachment, this function performs a fast depth clear
  * on a depth portion and a regular clear on the stencil portion. When
  * performing a fast depth clear on the depth portion, the HiZ buffer is simply
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/14] anv/cmd_buffer: Add a concept of pending load aspects

2018-02-05 Thread Jason Ekstrand
These are the same as pending clear aspects only for the "load"
operation.
---
 src/intel/vulkan/anv_private.h |  1 +
 src/intel/vulkan/genX_cmd_buffer.c | 22 --
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 906c6f3..d424498 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1678,6 +1678,7 @@ struct anv_attachment_state {
 
VkImageLayoutcurrent_layout;
VkImageAspectFlags   pending_clear_aspects;
+   VkImageAspectFlags   pending_load_aspects;
bool fast_clear;
VkClearValue clear_value;
bool clear_color_is_zero_one;
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 608f5ee..2590ea3 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -1123,26 +1123,36 @@ genX(cmd_buffer_setup_attachments)(struct 
anv_cmd_buffer *cmd_buffer,
  struct anv_render_pass_attachment *att = >attachments[i];
  VkImageAspectFlags att_aspects = vk_format_aspects(att->format);
  VkImageAspectFlags clear_aspects = 0;
+ VkImageAspectFlags load_aspects = 0;
 
  if (att_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
 /* color attachment */
 if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT;
+} else if (att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
+   load_aspects |= VK_IMAGE_ASPECT_COLOR_BIT;
 }
  } else {
 /* depthstencil attachment */
-if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
-att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
-   clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
+if (att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
+   if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
+  clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
+   } else if (att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
+  load_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
+   }
 }
-if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
-att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
-   clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
+if (att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
+   if (att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
+  clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
+   } else if (att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
+  load_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
+   }
 }
  }
 
  state->attachments[i].current_layout = att->initial_layout;
  state->attachments[i].pending_clear_aspects = clear_aspects;
+ state->attachments[i].pending_load_aspects = load_aspects;
  if (clear_aspects)
 state->attachments[i].clear_value = begin->pClearValues[i];
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/14] anv/pass: Store usage in each subpass attachment

2018-02-05 Thread Jason Ekstrand
This requires us to ditch the VkAttachmentReference struct in favor of
an anv-specific struct.  However, we can now easily identify from just
the subpass attachment what kind of an attachment it is.  This will make
iteration over anv_subpass::attachments a little easier in some case.
---
 src/intel/vulkan/anv_pass.c| 35 +++
 src/intel/vulkan/anv_private.h | 16 +++-
 src/intel/vulkan/genX_cmd_buffer.c |  2 +-
 3 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c
index a77e52b..5b8b138 100644
--- a/src/intel/vulkan/anv_pass.c
+++ b/src/intel/vulkan/anv_pass.c
@@ -65,7 +65,7 @@ VkResult anv_CreateRenderPass(
anv_multialloc_add(, , pCreateInfo->attachmentCount);
anv_multialloc_add(, _flushes, pCreateInfo->subpassCount + 1);
 
-   VkAttachmentReference *subpass_attachments;
+   struct anv_subpass_attachment *subpass_attachments;
uint32_t subpass_attachment_count = 0;
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
   subpass_attachment_count +=
@@ -117,7 +117,11 @@ VkResult anv_CreateRenderPass(
 
  for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
 uint32_t a = desc->pInputAttachments[j].attachment;
-subpass->input_attachments[j] = desc->pInputAttachments[j];
+subpass->input_attachments[j] = (struct anv_subpass_attachment) {
+   .usage =   VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT,
+   .attachment =  desc->pInputAttachments[j].attachment,
+   .layout =  desc->pInputAttachments[j].layout,
+};
 if (a != VK_ATTACHMENT_UNUSED) {
has_input = true;
pass->attachments[a].usage |= 
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
@@ -138,7 +142,11 @@ VkResult anv_CreateRenderPass(
 
  for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
 uint32_t a = desc->pColorAttachments[j].attachment;
-subpass->color_attachments[j] = desc->pColorAttachments[j];
+subpass->color_attachments[j] = (struct anv_subpass_attachment) {
+   .usage =   VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+   .attachment =  desc->pColorAttachments[j].attachment,
+   .layout =  desc->pColorAttachments[j].layout,
+};
 if (a != VK_ATTACHMENT_UNUSED) {
has_color = true;
pass->attachments[a].usage |= 
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
@@ -157,7 +165,11 @@ VkResult anv_CreateRenderPass(
 
  for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
 uint32_t a = desc->pResolveAttachments[j].attachment;
-subpass->resolve_attachments[j] = desc->pResolveAttachments[j];
+subpass->resolve_attachments[j] = (struct anv_subpass_attachment) {
+   .usage =   VK_IMAGE_USAGE_TRANSFER_DST_BIT,
+   .attachment =  desc->pResolveAttachments[j].attachment,
+   .layout =  desc->pResolveAttachments[j].layout,
+};
 if (a != VK_ATTACHMENT_UNUSED) {
subpass->has_resolve = true;
uint32_t color_att = desc->pColorAttachments[j].attachment;
@@ -174,8 +186,12 @@ VkResult anv_CreateRenderPass(
 
   if (desc->pDepthStencilAttachment) {
  uint32_t a = desc->pDepthStencilAttachment->attachment;
- *subpass_attachments++ = subpass->depth_stencil_attachment =
-*desc->pDepthStencilAttachment;
+ subpass->depth_stencil_attachment = (struct anv_subpass_attachment) {
+.usage =   VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+.attachment =  desc->pDepthStencilAttachment->attachment,
+.layout =  desc->pDepthStencilAttachment->layout,
+ };
+ *subpass_attachments++ = subpass->depth_stencil_attachment;
  if (a != VK_ATTACHMENT_UNUSED) {
 has_depth = true;
 pass->attachments[a].usage |=
@@ -186,8 +202,11 @@ VkResult anv_CreateRenderPass(
   *desc->pDepthStencilAttachment);
  }
   } else {
- subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
- subpass->depth_stencil_attachment.layout = VK_IMAGE_LAYOUT_UNDEFINED;
+ subpass->depth_stencil_attachment = (struct anv_subpass_attachment) {
+.usage =   VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+.attachment =  VK_ATTACHMENT_UNUSED,
+.layout =   VK_IMAGE_LAYOUT_UNDEFINED,
+ };
   }
}
 
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index d424498..9a8da2b 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -2865,6 +2865,12 @@ struct anv_framebuffer {
struct anv_image_view *  attachments[0];
 };
 
+struct anv_subpass_attachment {
+ 

[Mesa-dev] [PATCH 08/14] anv/cmd_buffer: Iterate all subpass attachments when clearing

2018-02-05 Thread Jason Ekstrand
This unifies things a bit because we now handle depth and stencil at the
same time.  It also ensures that clears happen for input attachments.
---
 src/intel/vulkan/genX_cmd_buffer.c | 77 --
 1 file changed, 32 insertions(+), 45 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index ab79fbf..608f5ee 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -3524,66 +3524,51 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer 
*cmd_buffer,
 
VkRect2D render_area = cmd_buffer->state.render_area;
struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
-   for (uint32_t i = 0; i < subpass->color_count; ++i) {
-  const uint32_t a = subpass->color_attachments[i].attachment;
+
+   for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
+  const uint32_t a = subpass->attachments[i].attachment;
   if (a == VK_ATTACHMENT_UNUSED)
  continue;
 
   assert(a < cmd_state->pass->attachment_count);
   struct anv_attachment_state *att_state = _state->attachments[a];
 
-  if (!att_state->pending_clear_aspects)
- continue;
-
-  assert(att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
-
   struct anv_image_view *iview = fb->attachments[a];
   const struct anv_image *image = iview->image;
 
-  /* Multi-planar images are not supported as attachments */
-  assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
-  assert(image->n_planes == 1);
-
-  uint32_t base_layer = iview->planes[0].isl.base_array_layer;
-  uint32_t layer_count = fb->layers;
+  if (att_state->pending_clear_aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
+ assert(att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
 
-  if (att_state->fast_clear) {
- /* We only support fast-clears on the first layer */
- assert(iview->planes[0].isl.base_level == 0);
- assert(iview->planes[0].isl.base_array_layer == 0);
-
- anv_image_ccs_op(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT,
-  0, 0, 1, ISL_AUX_OP_FAST_CLEAR, false);
- base_layer++;
- layer_count--;
-  }
-
-  if (layer_count > 0) {
+ /* Multi-planar images are not supported as attachments */
+ assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
  assert(image->n_planes == 1);
- anv_image_clear_color(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT,
-   att_state->aux_usage,
-   iview->planes[0].isl.format,
-   iview->planes[0].isl.swizzle,
-   iview->planes[0].isl.base_level,
-   base_layer, layer_count, render_area,
-   vk_to_isl_color(att_state->clear_value.color));
-  }
-
-  att_state->pending_clear_aspects = 0;
-   }
 
-   if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
-  const uint32_t a = subpass->depth_stencil_attachment.attachment;
+ uint32_t base_layer = iview->planes[0].isl.base_array_layer;
+ uint32_t layer_count = fb->layers;
 
-  assert(a < cmd_state->pass->attachment_count);
-  struct anv_attachment_state *att_state = _state->attachments[a];
-  struct anv_image_view *iview = fb->attachments[a];
-  const struct anv_image *image = iview->image;
+ if (att_state->fast_clear) {
+/* We only support fast-clears on the first layer */
+assert(iview->planes[0].isl.base_level == 0);
+assert(iview->planes[0].isl.base_array_layer == 0);
 
-  assert(image->aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
-   VK_IMAGE_ASPECT_STENCIL_BIT));
+anv_image_ccs_op(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT,
+ 0, 0, 1, ISL_AUX_OP_FAST_CLEAR, false);
+base_layer++;
+layer_count--;
+ }
 
-  if (att_state->pending_clear_aspects) {
+ if (layer_count > 0) {
+assert(image->n_planes == 1);
+anv_image_clear_color(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT,
+  att_state->aux_usage,
+  iview->planes[0].isl.format,
+  iview->planes[0].isl.swizzle,
+  iview->planes[0].isl.base_level,
+  base_layer, layer_count, render_area,
+  
vk_to_isl_color(att_state->clear_value.color));
+ }
+  } else if (att_state->pending_clear_aspects & (VK_IMAGE_ASPECT_DEPTH_BIT 
|
+ 
VK_IMAGE_ASPECT_STENCIL_BIT)) {
  if (att_state->fast_clear) {
 /* We currently only support HiZ for single-layer images */
 

[Mesa-dev] [PATCH 06/14] anv/cmd_buffer: Move the rest of clear_subpass into begin_subpass

2018-02-05 Thread Jason Ekstrand
---
 src/intel/vulkan/anv_blorp.c   | 243 -
 src/intel/vulkan/anv_private.h |  17 ++-
 src/intel/vulkan/genX_cmd_buffer.c |  68 ++-
 3 files changed, 188 insertions(+), 140 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index fd32227..4018476 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1136,143 +1136,6 @@ enum subpass_stage {
SUBPASS_STAGE_RESOLVE,
 };
 
-static bool
-subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer)
-{
-   const struct anv_cmd_state *cmd_state = _buffer->state;
-   uint32_t ds = cmd_state->subpass->depth_stencil_attachment.attachment;
-
-   if (ds != VK_ATTACHMENT_UNUSED) {
-  assert(ds < cmd_state->pass->attachment_count);
-  if (cmd_state->attachments[ds].pending_clear_aspects)
- return true;
-   }
-
-   return false;
-}
-
-void
-anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer)
-{
-   const struct anv_cmd_state *cmd_state = _buffer->state;
-   const VkRect2D render_area = cmd_buffer->state.render_area;
-
-
-   if (!subpass_needs_clear(cmd_buffer))
-  return;
-
-   /* Because this gets called within a render pass, we tell blorp not to
-* trash our depth and stencil buffers.
-*/
-   struct blorp_batch batch;
-   blorp_batch_init(_buffer->device->blorp, , cmd_buffer,
-BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
-
-   VkClearRect clear_rect = {
-  .rect = cmd_buffer->state.render_area,
-  .baseArrayLayer = 0,
-  .layerCount = cmd_buffer->state.framebuffer->layers,
-   };
-
-   struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
-
-   const uint32_t ds = cmd_state->subpass->depth_stencil_attachment.attachment;
-   assert(ds == VK_ATTACHMENT_UNUSED || ds < 
cmd_state->pass->attachment_count);
-
-   if (ds != VK_ATTACHMENT_UNUSED &&
-   cmd_state->attachments[ds].pending_clear_aspects) {
-
-  VkClearAttachment clear_att = {
- .aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
- .clearValue = cmd_state->attachments[ds].clear_value,
-  };
-
-
-  const uint8_t gen = cmd_buffer->device->info.gen;
-  bool clear_with_hiz = gen >= 8 && cmd_state->attachments[ds].aux_usage ==
-ISL_AUX_USAGE_HIZ;
-  const struct anv_image_view *iview = fb->attachments[ds];
-
-  if (clear_with_hiz) {
- const bool clear_depth = clear_att.aspectMask &
-  VK_IMAGE_ASPECT_DEPTH_BIT;
- const bool clear_stencil = clear_att.aspectMask &
-VK_IMAGE_ASPECT_STENCIL_BIT;
-
- /* Check against restrictions for depth buffer clearing. A great GPU
-  * performance benefit isn't expected when using the HZ sequence for
-  * stencil-only clears. Therefore, we don't emit a HZ op sequence for
-  * a stencil clear in addition to using the BLORP-fallback for depth.
-  */
- if (clear_depth) {
-if (!blorp_can_hiz_clear_depth(gen, iview->planes[0].isl.format,
-   iview->image->samples,
-   render_area.offset.x,
-   render_area.offset.y,
-   render_area.offset.x +
-   render_area.extent.width,
-   render_area.offset.y +
-   render_area.extent.height)) {
-   clear_with_hiz = false;
-} else if (clear_att.clearValue.depthStencil.depth !=
-   ANV_HZ_FC_VAL) {
-   /* Don't enable fast depth clears for any color not equal to
-* ANV_HZ_FC_VAL.
-*/
-   clear_with_hiz = false;
-} else if (gen == 8 &&
-   anv_can_sample_with_hiz(_buffer->device->info,
-   iview->image)) {
-   /* Only gen9+ supports returning ANV_HZ_FC_VAL when sampling a
-* fast-cleared portion of a HiZ buffer. Testing has revealed
-* that Gen8 only supports returning 0.0f. Gens prior to gen8 do
-* not support this feature at all.
-*/
-   clear_with_hiz = false;
-}
- }
-
- if (clear_with_hiz) {
-blorp_gen8_hiz_clear_attachments(, iview->image->samples,
- render_area.offset.x,
- render_area.offset.y,
- render_area.offset.x +
- render_area.extent.width,
- render_area.offset.y +
- render_area.extent.height,
-  

[Mesa-dev] [PATCH 00/14] anv: Rework subpass resolves and clears

2018-02-05 Thread Jason Ekstrand
This little series just shuffles code around to make things a bit more
clear.  (At least I think it does!)  The basic idea is to split set_subpass
into begin_subpass and end_subpass and then move all of resolve and clear
code into those two helpers.  This means that we no longer have 3 or 4
different functions that all loop over attachments which we have to call in
just the right order.

The last patch in this series takes advantage of this new structure by
allowing us to avoid the transition from UNDEFINED when LOAD_OP_CLEAR is
specified.  I highly doubt the performance impact of this will be noticable
but it's nice to be able to do.

Jason Ekstrand (14):
  anv/cmd_buffer: Apply subpass flushes before set_subpass
  anv/cmd_buffer: Add begin/end_subpass helpers
  anv/cmd_buffer: Pass a subpass id into begin_subpass
  anv/cmd_buffer: Move the color portion of clear_subpass into
begin_subpass
  intel/blorp: Add a blorp_hiz_clear_depth_stencil helper
  anv/cmd_buffer: Move the rest of clear_subpass into begin_subpass
  anv/cmd_buffer: Decide whether or not to HiZ clear up-front
  anv/cmd_buffer: Iterate all subpass attachments when clearing
  anv/cmd_buffer: Add a concept of pending load aspects
  anv/pass: Store usage in each subpass attachment
  anv/cmd_buffer: Sync clear values in begin_subpass
  anv/cmd_buffer: Mark depth/stencil surfaces written in begin_subpass
  anv/cmd_buffer: Do subpass image transitions in begin/end_subpass
  anv/cmd_buffer: Avoid unnecessary transitions before fast clears

 src/intel/blorp/blorp.h|  11 +
 src/intel/blorp/blorp_clear.c  |  50 
 src/intel/vulkan/anv_blorp.c   | 367 ++--
 src/intel/vulkan/anv_pass.c|  35 ++-
 src/intel/vulkan/anv_private.h |  42 ++-
 src/intel/vulkan/genX_cmd_buffer.c | 555 -
 6 files changed, 563 insertions(+), 497 deletions(-)

-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: rename gl_vertex_array_object::_VertexAttrib -> _VertexArray

2018-02-05 Thread Mathias Fröhlich
Hi Brian,

On Monday, 5 February 2018 18:23:17 CET Brian Paul wrote:
> Since the type is gl_vertex_array.  Update comment to explain that
> these arrays are only used by the VBO module.
> 
> Also rename some local variables in _mesa_update_vao_derived_arrays().

Makes sense to me and the change looks good.

Reviewed-by: Mathias Fröhlich 

best
Mathias

> ---
>  src/mesa/main/arrayobj.c  | 13 ++---
>  src/mesa/main/attrib.c|  2 +-
>  src/mesa/main/mtypes.h|  4 ++--
>  src/mesa/vbo/vbo_exec_array.c |  2 +-
>  4 files changed, 10 insertions(+), 11 deletions(-)
> 
> diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c
> index 360d097..a6fa33c 100644
> --- a/src/mesa/main/arrayobj.c
> +++ b/src/mesa/main/arrayobj.c
> @@ -283,8 +283,8 @@ unbind_array_object_vbos(struct gl_context *ctx, struct 
gl_vertex_array_object *
> for (i = 0; i < ARRAY_SIZE(obj->BufferBinding); i++)
>_mesa_reference_buffer_object(ctx, >BufferBinding[i].BufferObj, 
NULL);
>  
> -   for (i = 0; i < ARRAY_SIZE(obj->_VertexAttrib); i++)
> -  _mesa_reference_buffer_object(ctx, >_VertexAttrib[i].BufferObj, 
NULL);
> +   for (i = 0; i < ARRAY_SIZE(obj->_VertexArray); i++)
> +  _mesa_reference_buffer_object(ctx, >_VertexArray[i].BufferObj, 
NULL);
>  }
>  
>  
> @@ -453,14 +453,13 @@ _mesa_update_vao_derived_arrays(struct gl_context 
*ctx,
>  
> while (arrays) {
>const int attrib = u_bit_scan();
> -  struct gl_vertex_array *client_array = >_VertexAttrib[attrib];
> -  const struct gl_array_attributes *attrib_array =
> +  struct gl_vertex_array *array = >_VertexArray[attrib];
> +  const struct gl_array_attributes *attribs =
>   >VertexAttrib[attrib];
>const struct gl_vertex_buffer_binding *buffer_binding =
> - >BufferBinding[attrib_array->BufferBindingIndex];
> + >BufferBinding[attribs->BufferBindingIndex];
>  
> -  _mesa_update_vertex_array(ctx, client_array, attrib_array,
> -buffer_binding);
> +  _mesa_update_vertex_array(ctx, array, attribs, buffer_binding);
> }
>  }
>  
> diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
> index a9e4a11..8ac5db0 100644
> --- a/src/mesa/main/attrib.c
> +++ b/src/mesa/main/attrib.c
> @@ -1503,7 +1503,7 @@ copy_array_object(struct gl_context *ctx,
> /* skip RefCount */
>  
> for (i = 0; i < ARRAY_SIZE(src->VertexAttrib); i++) {
> -  _mesa_copy_vertex_array(ctx, >_VertexAttrib[i], 
>_VertexAttrib[i]);
> +  _mesa_copy_vertex_array(ctx, >_VertexArray[i], 
>_VertexArray[i]);
>_mesa_copy_vertex_attrib_array(ctx, >VertexAttrib[i], 
>VertexAttrib[i]);
>_mesa_copy_vertex_buffer_binding(ctx, >BufferBinding[i], 
>BufferBinding[i]);
> }
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index 3a67d43..aa083c3 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -1603,9 +1603,9 @@ struct gl_vertex_array_object
>  * Derived vertex attribute arrays
>  *
>  * This is a legacy data structure created from gl_array_attributes and
> -* gl_vertex_buffer_binding, for compatibility with existing driver 
code.
> +* gl_vertex_buffer_binding, only used by the VBO module at this time.
>  */
> -   struct gl_vertex_array _VertexAttrib[VERT_ATTRIB_MAX];
> +   struct gl_vertex_array _VertexArray[VERT_ATTRIB_MAX];
>  
> /** Vertex attribute arrays */
> struct gl_array_attributes VertexAttrib[VERT_ATTRIB_MAX];
> diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
> index 42759d5..a5bedc8 100644
> --- a/src/mesa/vbo/vbo_exec_array.c
> +++ b/src/mesa/vbo/vbo_exec_array.c
> @@ -314,7 +314,7 @@ recalculate_input_bindings(struct gl_context *ctx)
> struct vbo_context *vbo = vbo_context(ctx);
> struct vbo_exec_context *exec = >exec;
> const struct gl_vertex_array_object *vao = ctx->Array.VAO;
> -   const struct gl_vertex_array *vertexAttrib = vao->_VertexAttrib;
> +   const struct gl_vertex_array *vertexAttrib = vao->_VertexArray;
> const struct gl_vertex_array **inputs = >array.inputs[0];
>  
> /* May shuffle the position and generic0 bits around */
> 




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] mesa: Mute arrays for Bitmap/CopyPixels/DrawPixels callbacks.

2018-02-05 Thread Mathias . Froehlich
From: Mathias Fröhlich 

Set the _DrawArray pointer to NULL when calling into the Drivers
Bitmap/CopyPixels/DrawPixels hooks. This fixes an assert that gets
uncovered with the following patch gets applied.

Signed-off-by: Mathias Fröhlich 
---
 src/mesa/main/drawpix.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/mesa/main/drawpix.c b/src/mesa/main/drawpix.c
index ec1d2618ca..05a18d3e51 100644
--- a/src/mesa/main/drawpix.c
+++ b/src/mesa/main/drawpix.c
@@ -37,6 +37,7 @@
 #include "dispatch.h"
 #include "glformats.h"
 #include "fbobject.h"
+#include "varray.h"
 
 
 /*
@@ -72,6 +73,9 @@ _mesa_DrawPixels( GLsizei width, GLsizei height,
 */
_mesa_set_vp_override(ctx, GL_TRUE);
 
+   /* Prevent drivers from accessing stale draw array data */
+   _mesa_set_drawing_arrays(ctx, NULL);
+
/* Note: this call does state validation */
if (!_mesa_valid_to_render(ctx, "glDrawPixels")) {
   goto end;  /* the error code was recorded */
@@ -228,6 +232,9 @@ _mesa_CopyPixels( GLint srcx, GLint srcy, GLsizei width, 
GLsizei height,
 */
_mesa_set_vp_override(ctx, GL_TRUE);
 
+   /* Prevent drivers from accessing stale draw array data */
+   _mesa_set_drawing_arrays(ctx, NULL);
+
/* Note: this call does state validation */
if (!_mesa_valid_to_render(ctx, "glCopyPixels")) {
   goto end;  /* the error code was recorded */
@@ -320,6 +327,9 @@ _mesa_Bitmap( GLsizei width, GLsizei height,
if (ctx->RasterDiscard)
   return;
 
+   /* Prevent drivers from accessing stale draw array data */
+   _mesa_set_drawing_arrays(ctx, NULL);
+
if (ctx->RenderMode == GL_RENDER) {
   /* Truncate, to satisfy conformance tests (matches SGI's OpenGL). */
   if (width > 0 && height > 0) {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] mesa: Fix VAO buffer object tracking.

2018-02-05 Thread Mathias . Froehlich
From: Mathias Fröhlich 

When changing the attribute binding in the VAO we also need to
account for getting rid of non vbo bits from VertexAttribBufferMask.

Signed-off-by: Mathias Fröhlich 
Reviewed-by: Brian Paul 
---
 src/mesa/main/varray.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c
index 81b8fbe8ca..2fd9de630f 100644
--- a/src/mesa/main/varray.c
+++ b/src/mesa/main/varray.c
@@ -142,6 +142,8 @@ vertex_attrib_binding(struct gl_context *ctx,
 
   if (_mesa_is_bufferobj(vao->BufferBinding[bindingIndex].BufferObj))
  vao->VertexAttribBufferMask |= array_bit;
+  else
+ vao->VertexAttribBufferMask &= ~array_bit;
 
   FLUSH_VERTICES(ctx, _NEW_ARRAY);
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] mesa: Only update enabled VAO gl_vertex_array entries.

2018-02-05 Thread Mathias . Froehlich
From: Mathias Fröhlich 

Instead of updating all modified gl_vertex_array_object::_VertexArray
entries just update those that are modified and enabled.
Also release buffer object from the _VertexArray that belong
to disabled attributes.

v2: Also set Ptr and Size to zero.

Signed-off-by: Mathias Fröhlich 
Reviewed-by: Brian Paul 
---
 src/mesa/main/varray.c |  8 
 src/mesa/main/varray.h | 29 ++---
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c
index 2fd9de630f..a2d1d74798 100644
--- a/src/mesa/main/varray.c
+++ b/src/mesa/main/varray.c
@@ -152,7 +152,7 @@ vertex_attrib_binding(struct gl_context *ctx,
 
   array->BufferBindingIndex = bindingIndex;
 
-  vao->NewArrays |= array_bit;
+  vao->NewArrays |= vao->_Enabled & array_bit;
}
 }
 
@@ -187,7 +187,7 @@ _mesa_bind_vertex_buffer(struct gl_context *ctx,
   else
  vao->VertexAttribBufferMask |= binding->_BoundArrays;
 
-  vao->NewArrays |= binding->_BoundArrays;
+  vao->NewArrays |= vao->_Enabled & binding->_BoundArrays;
}
 }
 
@@ -208,7 +208,7 @@ vertex_binding_divisor(struct gl_context *ctx,
if (binding->InstanceDivisor != divisor) {
   FLUSH_VERTICES(ctx, _NEW_ARRAY);
   binding->InstanceDivisor = divisor;
-  vao->NewArrays |= binding->_BoundArrays;
+  vao->NewArrays |= vao->_Enabled & binding->_BoundArrays;
}
 }
 
@@ -318,7 +318,7 @@ _mesa_update_array_format(struct gl_context *ctx,
array->RelativeOffset = relativeOffset;
array->_ElementSize = elementSize;
 
-   vao->NewArrays |= VERT_BIT(attrib);
+   vao->NewArrays |= vao->_Enabled & VERT_BIT(attrib);
ctx->NewState |= _NEW_ARRAY;
 }
 
diff --git a/src/mesa/main/varray.h b/src/mesa/main/varray.h
index fe7eb81631..ede7a004e4 100644
--- a/src/mesa/main/varray.h
+++ b/src/mesa/main/varray.h
@@ -58,17 +58,24 @@ _mesa_update_vertex_array(struct gl_context *ctx,
   const struct gl_array_attributes *attribs,
   const struct gl_vertex_buffer_binding *binding)
 {
-   dst->Size = attribs->Size;
-   dst->Type = attribs->Type;
-   dst->Format = attribs->Format;
-   dst->StrideB = binding->Stride;
-   dst->Ptr = _mesa_vertex_attrib_address(attribs, binding);
-   dst->Normalized = attribs->Normalized;
-   dst->Integer = attribs->Integer;
-   dst->Doubles = attribs->Doubles;
-   dst->InstanceDivisor = binding->InstanceDivisor;
-   dst->_ElementSize = attribs->_ElementSize;
-   _mesa_reference_buffer_object(ctx, >BufferObj, binding->BufferObj);
+   if (attribs->Enabled) {
+  dst->Size = attribs->Size;
+  dst->Type = attribs->Type;
+  dst->Format = attribs->Format;
+  dst->StrideB = binding->Stride;
+  dst->Ptr = _mesa_vertex_attrib_address(attribs, binding);
+  dst->Normalized = attribs->Normalized;
+  dst->Integer = attribs->Integer;
+  dst->Doubles = attribs->Doubles;
+  dst->InstanceDivisor = binding->InstanceDivisor;
+  dst->_ElementSize = attribs->_ElementSize;
+  _mesa_reference_buffer_object(ctx, >BufferObj, binding->BufferObj);
+   } else {
+  /* Disabled arrays shall not be consumed */
+  dst->Size = 0;
+  dst->Ptr = NULL;
+  _mesa_reference_buffer_object(ctx, >BufferObj, NULL);
+   }
 }
 
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/3] Fix and tweak to the VAO v2

2018-02-05 Thread Mathias . Froehlich
From: Mathias Fröhlich 

Hi Brian,

Actually after incorporating your review requests to set
gl_vertex_array::Size and gl_vertex_array::Ptr to zero, radeonsi
started to assert in Bitmap/CopyPixels/DrawPixels.

So, here the updated series including the requested changes.
And additoinally for review the change to fix the mentioned asserts
in several piglit tests.

Please review!

best

Mathias


Mathias Fröhlich (3):
  mesa: Fix VAO buffer object tracking.
  mesa: Mute arrays for Bitmap/CopyPixels/DrawPixels callbacks.
  mesa: Only update enabled VAO gl_vertex_array entries.

 src/mesa/main/drawpix.c | 10 ++
 src/mesa/main/varray.c  | 10 ++
 src/mesa/main/varray.h  | 29 ++---
 3 files changed, 34 insertions(+), 15 deletions(-)

-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Haiku: convert to autotools

2018-02-05 Thread kallisti5

On 2018-02-05 15:39, Dylan Baker wrote:

Quoting kallisti5 (2018-02-05 12:58:30)

On 2017-10-24 11:47, Emil Velikov wrote:
> Hi Jerome,
>
> On 23 October 2017 at 16:58, Jerome Duval 
> wrote:
>> * configure.ac:
>>   -pthread is not available on Haiku.
>>   Haiku doesn't require --enable-dri
>>   build hgl on Haiku
>> * egl/Makefile.am: define backendfiles for Haiku
>> * src/gallium/Makefile.am: build winsys/sw/hgl, state_trackers/hgl and
>> targets/haiku-softpipe on Haiku.
>> * src/gallium/targets/haiku-softpipe: add Makefile.am
>> * src/gallium/state_trackers/hgl: add Makefile.am
>> * winsys/sw/hgl: add Makefile.am
>> * src/hgl/Makefile.am: add Makefile.am
>> ---
> Thanks for the patch. I think Eric has a point regarding splitting this
> up.
> Here is one way to handle it:
>  - patch 1 - the driver, aka st/hgl + sw/hgl + targets/haiku
>  - 2 - src/egl
>  - 3 - src/hgl
>  - 4 misc fixes (the SoftwareRenderer.cpp hunk?)
>  - 5 toggle - configure.ac + src/Makefile.am

Hm, it looks like Jerome never got back to work on these changes... 
let

me try to
pick up the ball and run with it.

> Couple of small suggestions:
>  - keep all the sources and headers in the sources lists in
> Makefile.sources
>  - how do you guys manage pthreads - please mention that in the commit
> message.
>
> If I'm reading this correctly, you strip out -pthread and there's no
> pthread-stubs on Haiku.

Haiku (and BeOS for that matter) has pthread support built into its 
core

libroot.so.

No need for -lpthread, all applications can assume its presence. 
Things

that link -lpthread actually fail due to a non-existant libpthread...
*however* as i'm typing this i'm being told we recently implemented a
dummy static libpthread.a to try and appease assumptions about 
-lpthread

existence so i'll remove the pthread checks :-)

  -- Alex


Hi Alex,

I have a branch for building haiku with meson, when I was trying to 
compile
neither the scons build nor the autotools build seemed to compile on a 
Haiku VM
instance (x86_64), that was a few months ago though, so maybe its 
fixed.


Our plan is to remove autotools from mesa, probably this year. I'm 
thinking if
things look pretty good through the 18.0 release cycle I'll probably 
propose

marking autotools as deprecated for 18.1 and propose removal in 18.2.


Ah. crap.  I just got autoconfig working :-).  Historically I have only 
used
SCons for our builds.  I always preferred the SCons build since 
autotools always

ends up looking like spaghetti.  Here is what our current build does:

https://github.com/haikuports/haikuports/blob/master/sys-libs/mesa/mesa-17.1.4.recipe#L52

It looks like Jerome hacked in a patch for autotools... but i've heard 
some reports

of instability with the resulting artifacts.

I'm not going to block you guys using autotools or NAK anything, I just 
want

you to be aware that we're trying to consolidate down to just meson and
android.mk files. I can respin the haiku patches and CC you if you're 
interested in

looking at them.


If Meson is the future, i'm definitely down helping (or even taking 
over) that branch

if it is just incomplete Haiku work.

I'm going to try and do better maintenance on Haiku Mesa in 2018. I've 
been only around

minimally in 2017 am a little out of date.

You might also want to see if you guys can update your meson, at least 
last time
I checked it was 0.42, and I fixed the pthreads stuff in 0.43 so that 
-pthread

and -lpthread are never added by meson.


I just installed meson on Haiku and we are currently at 0.43.0

 -- Alex
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac/nir: remove emission of nir_op_fdiv

2018-02-05 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Mon, Feb 5, 2018 at 9:37 PM, Samuel Pitoiset
 wrote:
> RadeonSI and RADV lower fdiv.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 5 -
>  1 file changed, 5 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index b211832b47..e25bae600f 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -1760,11 +1760,6 @@ static void visit_alu(struct ac_nir_context *ctx, 
> const nir_alu_instr *instr)
> src[1] = ac_to_float(>ac, src[1]);
> result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
> break;
> -   case nir_op_fdiv:
> -   src[0] = ac_to_float(>ac, src[0]);
> -   src[1] = ac_to_float(>ac, src[1]);
> -   result = ac_build_fdiv(>ac, src[0], src[1]);
> -   break;
> case nir_op_frcp:
> src[0] = ac_to_float(>ac, src[0]);
> result = ac_build_fdiv(>ac, 
> instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1,
> --
> 2.16.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] meson: ensure xmlpool/options.h is generated for libgallium

2018-02-05 Thread Jon Turney
In file included from ../src/gallium/targets/dri/target.c:1:
In file included from ../src/gallium/auxiliary/target-helpers/drm_helper.h:8:
../src/util/xmlpool.h:103:10: fatal error: 'xmlpool/options.h' file not found

See also 26bde1e3.

Signed-off-by: Jon Turney 
---
 src/gallium/targets/dri/meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/targets/dri/meson.build 
b/src/gallium/targets/dri/meson.build
index 30368c2152..75ce94ab2c 100644
--- a/src/gallium/targets/dri/meson.build
+++ b/src/gallium/targets/dri/meson.build
@@ -51,7 +51,7 @@ endif
 
 libgallium_dri = shared_library(
   'gallium_dri',
-  files('target.c'),
+  [files('target.c'), xmlpool_options_h],
   include_directories : [
 inc_common, inc_util, inc_dri_common, inc_gallium_drivers,
 inc_gallium_winsys, include_directories('../../state_trackers/dri'),
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] util: remove redundant check for the __clang__ macro

2018-02-05 Thread Vlad Golovkin
In this file there are similar cases with macros PUBLIC, USED and
ATTRIBUTE_NOINLINE, before defining which as __attribute__(...), code
only checks for __GNUC__.
Should I add comments there as well?

2018-02-05 22:51 GMT+02:00 Brian Paul :
> On 02/05/2018 01:44 PM, Vlad Golovkin wrote:
>>
>> Clang defines __GNUC__ macro, so one doesn't need to check __clang__
>> macro in this particular case.
>
>
> Perhaps mention that in a comment below so there's no confusion.
>
> -Brian
>
>
>> ---
>>   src/util/macros.h | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/src/util/macros.h b/src/util/macros.h
>> index 432d513930..d36ca095d5 100644
>> --- a/src/util/macros.h
>> +++ b/src/util/macros.h
>> @@ -138,7 +138,7 @@ do {   \
>> /* Forced function inlining */
>>   #ifndef ALWAYS_INLINE
>> -#  if defined(__GNUC__) || defined(__clang__)
>> +#  if defined(__GNUC__)
>>   #define ALWAYS_INLINE inline __attribute__((always_inline))
>>   #  elif defined(_MSC_VER)
>>   #define ALWAYS_INLINE __forceinline
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Haiku: convert to autotools

2018-02-05 Thread Dylan Baker
Quoting kallisti5 (2018-02-05 12:58:30)
> On 2017-10-24 11:47, Emil Velikov wrote:
> > Hi Jerome,
> > 
> > On 23 October 2017 at 16:58, Jerome Duval  
> > wrote:
> >> * configure.ac:
> >>   -pthread is not available on Haiku.
> >>   Haiku doesn't require --enable-dri
> >>   build hgl on Haiku
> >> * egl/Makefile.am: define backendfiles for Haiku
> >> * src/gallium/Makefile.am: build winsys/sw/hgl, state_trackers/hgl and
> >> targets/haiku-softpipe on Haiku.
> >> * src/gallium/targets/haiku-softpipe: add Makefile.am
> >> * src/gallium/state_trackers/hgl: add Makefile.am
> >> * winsys/sw/hgl: add Makefile.am
> >> * src/hgl/Makefile.am: add Makefile.am
> >> ---
> > Thanks for the patch. I think Eric has a point regarding splitting this 
> > up.
> > Here is one way to handle it:
> >  - patch 1 - the driver, aka st/hgl + sw/hgl + targets/haiku
> >  - 2 - src/egl
> >  - 3 - src/hgl
> >  - 4 misc fixes (the SoftwareRenderer.cpp hunk?)
> >  - 5 toggle - configure.ac + src/Makefile.am
> 
> Hm, it looks like Jerome never got back to work on these changes... let 
> me try to
> pick up the ball and run with it.
> 
> > Couple of small suggestions:
> >  - keep all the sources and headers in the sources lists in 
> > Makefile.sources
> >  - how do you guys manage pthreads - please mention that in the commit 
> > message.
> > 
> > If I'm reading this correctly, you strip out -pthread and there's no
> > pthread-stubs on Haiku.
> 
> Haiku (and BeOS for that matter) has pthread support built into its core 
> libroot.so.
> 
> No need for -lpthread, all applications can assume its presence. Things 
> that link -lpthread actually fail due to a non-existant libpthread...  
> *however* as i'm typing this i'm being told we recently implemented a 
> dummy static libpthread.a to try and appease assumptions about -lpthread 
> existence so i'll remove the pthread checks :-)
> 
>   -- Alex

Hi Alex,

I have a branch for building haiku with meson, when I was trying to compile
neither the scons build nor the autotools build seemed to compile on a Haiku VM
instance (x86_64), that was a few months ago though, so maybe its fixed.

Our plan is to remove autotools from mesa, probably this year. I'm thinking if
things look pretty good through the 18.0 release cycle I'll probably propose
marking autotools as deprecated for 18.1 and propose removal in 18.2. I'm not
going to block you guys using autotools or NAK anything, I just want you to be
aware that we're trying to consolidate down to just meson and android.mk files.
I can respin the haiku patches and CC you if you're interested in looking at
them.

You might also want to see if you guys can update your meson, at least last time
I checked it was 0.42, and I fixed the pthreads stuff in 0.43 so that -pthread
and -lpthread are never added by meson.

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac/nir: remove emission of nir_op_fdiv

2018-02-05 Thread Timothy Arceri

Reviewed-by: Timothy Arceri 

On 06/02/18 07:37, Samuel Pitoiset wrote:

RadeonSI and RADV lower fdiv.

Signed-off-by: Samuel Pitoiset 
---
  src/amd/common/ac_nir_to_llvm.c | 5 -
  1 file changed, 5 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index b211832b47..e25bae600f 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1760,11 +1760,6 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
src[1] = ac_to_float(>ac, src[1]);
result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
break;
-   case nir_op_fdiv:
-   src[0] = ac_to_float(>ac, src[0]);
-   src[1] = ac_to_float(>ac, src[1]);
-   result = ac_build_fdiv(>ac, src[0], src[1]);
-   break;
case nir_op_frcp:
src[0] = ac_to_float(>ac, src[0]);
result = ac_build_fdiv(>ac, instr->dest.dest.ssa.bit_size == 32 
? ctx->ac.f32_1 : ctx->ac.f64_1,


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/8] drivers/radeonsi:create uvd hevc enc entry

2018-02-05 Thread James Zhu
Add UVD hevc encode pipe video codec creation entry

Signed-off-by: James Zhu 
---
 src/gallium/drivers/radeonsi/si_uvd.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_uvd.c 
b/src/gallium/drivers/radeonsi/si_uvd.c
index 64f2f8e..0dea60d 100644
--- a/src/gallium/drivers/radeonsi/si_uvd.c
+++ b/src/gallium/drivers/radeonsi/si_uvd.c
@@ -31,6 +31,8 @@
 #include "radeon/radeon_vce.h"
 #include "radeon/radeon_vcn_dec.h"
 #include "radeon/radeon_vcn_enc.h"
+#include "radeon/radeon_uvd_enc.h"
+#include "util/u_video.h"
 
 /**
  * creates an video buffer with an UVD compatible memory layout
@@ -146,9 +148,16 @@ struct pipe_video_codec *si_uvd_create_decoder(struct 
pipe_context *context,
struct si_context *ctx = (struct si_context *)context;
bool vcn = (ctx->b.family == CHIP_RAVEN) ? true : false;
 
-   if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE)
-   return (vcn) ? radeon_create_encoder(context, templ, ctx->b.ws, 
si_vce_get_buffer) :
-   si_vce_create_encoder(context, templ, ctx->b.ws, 
si_vce_get_buffer);
+   if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
+   if (vcn) {
+   radeon_create_encoder(context, templ, ctx->b.ws, 
si_vce_get_buffer);
+   } else {
+   if (u_reduce_video_profile(templ->profile) == 
PIPE_VIDEO_FORMAT_HEVC)
+   radeon_uvd_create_encoder(context, templ, 
ctx->b.ws, si_vce_get_buffer);
+   else
+   si_vce_create_encoder(context, templ, 
ctx->b.ws, si_vce_get_buffer);
+   }
+   }
 
return (vcn) ?  radeon_create_decoder(context, templ) :
si_common_uvd_create_decoder(context, templ, si_uvd_set_dtb);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/8] amdgpu/drm:add uvd hevc enc support in amdgpu cs

2018-02-05 Thread Boyuan Zhang



On 2018-02-05 12:16 PM, James Zhu wrote:

Signed-off-by: James Zhu 
---
  src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 1927a3a..6f305b7 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -376,6 +376,7 @@ static bool amdgpu_cs_has_user_fence(struct 
amdgpu_cs_context *cs)
  {
 return cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD &&
cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCE &&
+  cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD_ENC &&
cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_DEC &&
cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_ENC;
  }
@@ -818,6 +819,10 @@ static bool amdgpu_init_cs_context(struct 
amdgpu_cs_context *cs,
cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD;
break;
  
+   case RING_UVD_ENC:

+  cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD_ENC;
+  break;
+


Please follow previous indentation, use space instead of tab here.

Also, the patch name might better be changed to winsys/amdgpu.

With those fixed, this patch is
Reviewed-by: Boyuan Zhang 

Thanks,
Boyuan



 case RING_VCE:
cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCE;
break;
@@ -1533,6 +1538,7 @@ static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4;
break;
 case RING_UVD:
+   case RING_UVD_ENC:
while (rcs->current.cdw & 15)
   radeon_emit(rcs, 0x8000); /* type2 nop packet */
break;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Haiku: convert to autotools

2018-02-05 Thread kallisti5

On 2017-10-24 11:47, Emil Velikov wrote:

Hi Jerome,

On 23 October 2017 at 16:58, Jerome Duval  
wrote:

* configure.ac:
  -pthread is not available on Haiku.
  Haiku doesn't require --enable-dri
  build hgl on Haiku
* egl/Makefile.am: define backendfiles for Haiku
* src/gallium/Makefile.am: build winsys/sw/hgl, state_trackers/hgl and
targets/haiku-softpipe on Haiku.
* src/gallium/targets/haiku-softpipe: add Makefile.am
* src/gallium/state_trackers/hgl: add Makefile.am
* winsys/sw/hgl: add Makefile.am
* src/hgl/Makefile.am: add Makefile.am
---
Thanks for the patch. I think Eric has a point regarding splitting this 
up.

Here is one way to handle it:
 - patch 1 - the driver, aka st/hgl + sw/hgl + targets/haiku
 - 2 - src/egl
 - 3 - src/hgl
 - 4 misc fixes (the SoftwareRenderer.cpp hunk?)
 - 5 toggle - configure.ac + src/Makefile.am


Hm, it looks like Jerome never got back to work on these changes... let 
me try to

pick up the ball and run with it.


Couple of small suggestions:
 - keep all the sources and headers in the sources lists in 
Makefile.sources
 - how do you guys manage pthreads - please mention that in the commit 
message.


If I'm reading this correctly, you strip out -pthread and there's no
pthread-stubs on Haiku.


Haiku (and BeOS for that matter) has pthread support built into its core 
libroot.so.


No need for -lpthread, all applications can assume its presence. Things 
that link -lpthread actually fail due to a non-existant libpthread...  
*however* as i'm typing this i'm being told we recently implemented a 
dummy static libpthread.a to try and appease assumptions about -lpthread 
existence so i'll remove the pthread checks :-)


 -- Alex
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 0/6] Implement commont gralloc_handle_t in libdrm

2018-02-05 Thread Robert Foss

Hey,

I haven't seen any feedback, but am looking for an ACK/LGTM.
If anyone has a cycle to spare, it would be helpful :)


Thanks,
Rob.

On 01/29/2018 06:37 PM, Robert Foss wrote:

This series moves {gbm,drm,cros}_gralloc_handle_t struct to libdrm,
since at least 4 implementations exist, and share a lot of contents.
The idea is to keep the common stuff defined in one place, and libdrm
is the common codebase to all of these platforms.

Additionally, having this struct defined in libdrm will make it
easier for mesa and gralloc implementations to communicate.

A second series is expected to be submitted, which will contain an accessor
function implementation that should that would allow each gralloc to
implementation to supply their own accessors.

Robert Foss (6):
   android: Move gralloc handle struct to libdrm
   android: Add version variable to gralloc_handle_t
   android: Mark gralloc_handle_t magic variable as const
   android: Remove member name from gralloc_handle_t
   android: Change gralloc_handle_t format from Android format to fourcc
   android: Change gralloc_handle_t members to be fixed width

  Android.mk   |   8 +++-
  Makefile.sources |   3 ++
  android/gralloc_handle.h | 109 +++
  3 files changed, 118 insertions(+), 2 deletions(-)
  create mode 100644 android/gralloc_handle.h


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/8] amd/common:add uvd hevc enc support check in hw query

2018-02-05 Thread Boyuan Zhang



On 2018-02-05 12:16 PM, James Zhu wrote:

Based on amdgpu hardware query information to check if UVD hevc enc support

Signed-off-by: James Zhu 
---
  src/amd/common/ac_gpu_info.c | 10 +-
  src/amd/common/ac_gpu_info.h |  1 +
  2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 6d9dcb5..2494967 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -98,7 +98,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
  {
struct amdgpu_buffer_size_alignments alignment_info = {};
struct amdgpu_heap_info vram, vram_vis, gtt;
-   struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, vce = 
{}, vcn_dec = {}, vcn_enc = {};
+   struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, uvd_enc 
= {}, vce = {}, vcn_dec = {}, vcn_enc = {};
uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature 
= 0;
int r, i, j;
drmDevicePtr devinfo;
@@ -166,6 +166,12 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
return false;
}
  
+	r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_UVD_ENC, 0, _enc);

+   if (r) {
+   fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd_enc) 
failed.\n");
+   return false;
+   }
+
if (info->drm_major == 3 && info->drm_minor >= 17) {
r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_DEC, 0, 
_dec);
if (r) {
@@ -275,6 +281,8 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
uvd.available_rings ? uvd_version : 0;
info->vce_fw_version =
vce.available_rings ? vce_version : 0;
+   info->uvd_enc_supported =
+   uvd_enc.available_rings ? true : false;
info->has_userptr = true;
info->has_syncobj = has_syncobj(fd);
info->has_syncobj_wait_for_submit = info->has_syncobj && info->drm_minor 
>= 20;
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index cca3e98..6b120d1 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -65,6 +65,7 @@ struct radeon_info {
uint32_tnum_compute_rings;
uint32_tuvd_fw_version;
uint32_tvce_fw_version;
+   booluvd_enc_supported;


White space/tab length seems not correct here. With that fixed, this 
patch is

Reviewed-by: Boyuan Zhang 

Thanks,
Boyuan


uint32_tme_fw_version;
uint32_tme_fw_feature;
uint32_tpfp_fw_version;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] util: remove redundant check for the __clang__ macro

2018-02-05 Thread Brian Paul

On 02/05/2018 01:44 PM, Vlad Golovkin wrote:

Clang defines __GNUC__ macro, so one doesn't need to check __clang__
macro in this particular case.


Perhaps mention that in a comment below so there's no confusion.

-Brian


---
  src/util/macros.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/util/macros.h b/src/util/macros.h
index 432d513930..d36ca095d5 100644
--- a/src/util/macros.h
+++ b/src/util/macros.h
@@ -138,7 +138,7 @@ do {   \
  
  /* Forced function inlining */

  #ifndef ALWAYS_INLINE
-#  if defined(__GNUC__) || defined(__clang__)
+#  if defined(__GNUC__)
  #define ALWAYS_INLINE inline __attribute__((always_inline))
  #  elif defined(_MSC_VER)
  #define ALWAYS_INLINE __forceinline



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] util: remove redundant check for the __clang__ macro

2018-02-05 Thread Vlad Golovkin
Clang defines __GNUC__ macro, so one doesn't need to check __clang__
macro in this particular case.
---
 src/util/macros.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/util/macros.h b/src/util/macros.h
index 432d513930..d36ca095d5 100644
--- a/src/util/macros.h
+++ b/src/util/macros.h
@@ -138,7 +138,7 @@ do {   \
 
 /* Forced function inlining */
 #ifndef ALWAYS_INLINE
-#  if defined(__GNUC__) || defined(__clang__)
+#  if defined(__GNUC__)
 #define ALWAYS_INLINE inline __attribute__((always_inline))
 #  elif defined(_MSC_VER)
 #define ALWAYS_INLINE __forceinline
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] ac/nir: remove emission of nir_op_fdiv

2018-02-05 Thread Samuel Pitoiset
RadeonSI and RADV lower fdiv.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_nir_to_llvm.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index b211832b47..e25bae600f 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1760,11 +1760,6 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
src[1] = ac_to_float(>ac, src[1]);
result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
break;
-   case nir_op_fdiv:
-   src[0] = ac_to_float(>ac, src[0]);
-   src[1] = ac_to_float(>ac, src[1]);
-   result = ac_build_fdiv(>ac, src[0], src[1]);
-   break;
case nir_op_frcp:
src[0] = ac_to_float(>ac, src[0]);
result = ac_build_fdiv(>ac, instr->dest.dest.ssa.bit_size 
== 32 ? ctx->ac.f32_1 : ctx->ac.f64_1,
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/8] drivers/radeonsi:create uvd hevc enc entry

2018-02-05 Thread Boyuan Zhang



On 2018-02-05 12:16 PM, James Zhu wrote:

Add UVD hevc encode pipe video codec creation entry

Signed-off-by: James Zhu 
---
  src/gallium/drivers/radeonsi/si_uvd.c | 13 +
  1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_uvd.c 
b/src/gallium/drivers/radeonsi/si_uvd.c
index 64f2f8e..fa43a96 100644
--- a/src/gallium/drivers/radeonsi/si_uvd.c
+++ b/src/gallium/drivers/radeonsi/si_uvd.c
@@ -31,7 +31,8 @@
  #include "radeon/radeon_vce.h"
  #include "radeon/radeon_vcn_dec.h"
  #include "radeon/radeon_vcn_enc.h"
-
+#include "radeon/radeon_uvd_enc.h"
+#include "util/u_video.h"

Could you add back the blank line please?

  /**
   * creates an video buffer with an UVD compatible memory layout
   */
@@ -146,9 +147,13 @@ struct pipe_video_codec *si_uvd_create_decoder(struct 
pipe_context *context,
struct si_context *ctx = (struct si_context *)context;
bool vcn = (ctx->b.family == CHIP_RAVEN) ? true : false;
  
-	if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE)

-   return (vcn) ? radeon_create_encoder(context, templ, ctx->b.ws, 
si_vce_get_buffer) :
-   si_vce_create_encoder(context, templ, ctx->b.ws, 
si_vce_get_buffer);
+   if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
+   if (u_reduce_video_profile(templ->profile) == 
PIPE_VIDEO_FORMAT_HEVC) {
+   return (vcn) ? radeon_create_encoder(context, templ, 
ctx->b.ws, si_vce_get_buffer) :
+   
radeon_uvd_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer);
+   } else
+   return si_vce_create_encoder(context, templ, ctx->b.ws, 
si_vce_get_buffer);
+   }
It seems that this change will break the original logic for vcn h.264 
encode case, please fix it.


Thanks,
Boyuan

  
  	return (vcn) ? 	radeon_create_decoder(context, templ) :

si_common_uvd_create_decoder(context, templ, si_uvd_set_dtb);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/8] drivers/radeon:add uvd hevc enc functions

2018-02-05 Thread James Zhu
Implement UVD hevc encode functions

Signed-off-by: James Zhu 
---
 src/gallium/drivers/radeon/radeon_uvd_enc.c | 340 
 1 file changed, 340 insertions(+)
 create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c

diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c 
b/src/gallium/drivers/radeon/radeon_uvd_enc.c
new file mode 100644
index 000..6eb6cda
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c
@@ -0,0 +1,340 @@
+/**
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **/
+
+#include 
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+
+#include "radeonsi/si_pipe.h"
+#include "radeon_video.h"
+#include "radeon_uvd_enc.h"
+
+static void radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc, struct 
pipe_h265_enc_picture_desc *picture)
+{
+  struct pipe_h265_enc_picture_desc *pic = (struct 
pipe_h265_enc_picture_desc *)picture;
+  enc->enc_pic.picture_type = pic->picture_type;
+  enc->enc_pic.frame_num = pic->frame_num;
+  enc->enc_pic.pic_order_cnt = pic->pic_order_cnt;
+  enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type;
+  enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0;
+  enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1;
+  enc->enc_pic.not_referenced = pic->not_referenced;
+  enc->enc_pic.is_idr = (pic->picture_type == 
PIPE_H265_ENC_PICTURE_TYPE_IDR) ||
+(pic->picture_type == 
PIPE_H265_ENC_PICTURE_TYPE_I);
+  enc->enc_pic.crop_left = 0;
+  enc->enc_pic.crop_right = (align(enc->base.width, 16) - enc->base.width) 
/ 2;
+  enc->enc_pic.crop_top = 0;
+  enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - 
enc->base.height) / 2;
+  enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag;
+  enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc;
+  enc->enc_pic.general_level_idc = pic->seq.general_level_idc;
+  enc->enc_pic.max_poc = pic->seq.intra_period;
+  enc->enc_pic.log2_max_poc = 0;
+  for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++)
+ i = (i >> 1);
+  enc->enc_pic.chroma_format_idc = pic->seq.chroma_format_idc;
+  enc->enc_pic.pic_width_in_luma_samples = 
pic->seq.pic_width_in_luma_samples;
+  enc->enc_pic.pic_height_in_luma_samples = 
pic->seq.pic_height_in_luma_samples;
+  enc->enc_pic.log2_diff_max_min_luma_coding_block_size = 
pic->seq.log2_diff_max_min_luma_coding_block_size;
+  enc->enc_pic.log2_min_transform_block_size_minus2 = 
pic->seq.log2_min_transform_block_size_minus2;
+  enc->enc_pic.log2_diff_max_min_transform_block_size = 
pic->seq.log2_diff_max_min_transform_block_size;
+  enc->enc_pic.max_transform_hierarchy_depth_inter = 
pic->seq.max_transform_hierarchy_depth_inter;
+  enc->enc_pic.max_transform_hierarchy_depth_intra = 
pic->seq.max_transform_hierarchy_depth_intra;
+  enc->enc_pic.log2_parallel_merge_level_minus2 = 
pic->pic.log2_parallel_merge_level_minus2;
+  enc->enc_pic.bit_depth_luma_minus8 = pic->seq.bit_depth_luma_minus8;
+  enc->enc_pic.bit_depth_chroma_minus8 = pic->seq.bit_depth_chroma_minus8;
+  enc->enc_pic.nal_unit_type = pic->pic.nal_unit_type;
+  enc->enc_pic.max_num_merge_cand = pic->slice.max_num_merge_cand;
+  enc->enc_pic.sample_adaptive_offset_enabled_flag = 
pic->seq.sample_adaptive_offset_enabled_flag;
+  enc->enc_pic.pcm_enabled_flag = pic->seq.pcm_enabled_flag;
+  enc->enc_pic.sps_temporal_mvp_enabled_flag = 
pic->seq.sps_temporal_mvp_enabled_flag;
+}
+
+static void flush(struct radeon_uvd_encoder 

Re: [Mesa-dev] [PATCH] meson: better defaults for osx, windows and cygwin

2018-02-05 Thread Jon Turney

On 05/02/2018 17:34, Dylan Baker wrote:

Quoting Jon Turney (2018-02-03 13:19:20)

On 03/02/2018 18:07, Dylan Baker wrote:

Quoting Jon Turney (2018-02-03 05:49:40)

-  if not ['darwin', 'windows'].contains(host_machine.system())
+  if not ['darwin', 'windows', 'cygwin'].contains(host_machine.system())
+# TODO: PPC, Sparc
   if ['x86', 'x86_64'].contains(host_machine.cpu_family())
 _drivers = 'i915,i965,r100,r200,nouveau'
   else
 error('Unknown architecture. Please pass -Ddri-drivers to set driver 
options. Patches gladly accepted to fix this.')
   endif
 else
-error('Unknown OS. Please pass -Ddri-drivers to set driver options. 
Patches gladly accepted to fix this.')
+# only swrast would make sense here, but gallium swrast is a much better 
default
+_drivers = ''


I'm really not a fan of dumping the 'else error' case. This currently means that
for example haiku will try to build something that they cannot support. I'd
really rather just set appropriate defaults for OSes that are guaranteed
supported and still let OSes that haven't been tested fall through to error. I
also think that's a nice place for people trying to use mesa meson on a new
platform, since they understand we haven't tested on their OS.


Good idea.  But that's not what the code currently does.  If it's not on
the list of 'unknown' OSes (darwin, windows), any other OS e.g. haiku
gets treated like linux...

Attached is a revised patch which is more explicit about what's a known
OS.  I guess the BSDs probably should be added somewhere, but idk what's
appropriate for them.


I rather like this patch, so you can add:
Reviewed-by: Dylan Baker 


Thanks.


The only thing I might do differently is instead of checking for Linux use the
`system_has_kms_drm` variable (which covers the BSDs as well as Linux, but I'm
okay with landing this as-is and changing that later if that is the right thing
to do.


Yeah, that seems pretty plausible.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/8] drivers/radeon:add uvd hevc enc functions

2018-02-05 Thread James Zhu



On 2018-02-05 01:04 PM, Alex Deucher wrote:

On Mon, Feb 5, 2018 at 12:16 PM, James Zhu  wrote:

Implement UVD hevc encode functions

Signed-off-by: James Zhu 
---
  1   |  21 ++
  src/gallium/drivers/radeon/radeon_uvd_enc.c | 340 
  2 files changed, 361 insertions(+)
  create mode 100644 1
  create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c

diff --git a/1 b/1
new file mode 100644
index 000..51dd09e
--- /dev/null
+++ b/1
@@ -0,0 +1,21 @@
+r c80294d drivers/radeon:Add uvd hevc enc hw interface header
+pick 2d924d5 drivers/radeon:add uvd hevc enc hw ib implementation
+
+# Rebase f2b9031..2d924d5 onto f2b9031 (2 command(s))
+#
+# Commands:
+# p, pick = use commit
+# r, reword = use commit, but edit the commit message
+# e, edit = use commit, but stop for amending
+# s, squash = use commit, but meld into previous commit
+# f, fixup = like "squash", but discard this commit's log message
+# x, exec = run command (the rest of the line) using shell
+# d, drop = remove commit
+#
+# These lines can be re-ordered; they are executed from top to bottom.
+#
+# If you remove a line here THAT COMMIT WILL BE LOST.
+#
+# However, if you remove everything, the rebase will be aborted.
+#
+# Note that empty commits are commented out

Looks like some garbage got accidently added here.

Removed garbage file. send out version 2 patches.

diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c 
b/src/gallium/drivers/radeon/radeon_uvd_enc.c
new file mode 100644
index 000..6eb6cda
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c
@@ -0,0 +1,340 @@
+/**
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **/
+
+#include 
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+
+#include "radeonsi/si_pipe.h"
+#include "radeon_video.h"
+#include "radeon_uvd_enc.h"
+
+static void radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc, struct 
pipe_h265_enc_picture_desc *picture)
+{
+  struct pipe_h265_enc_picture_desc *pic = (struct 
pipe_h265_enc_picture_desc *)picture;
+  enc->enc_pic.picture_type = pic->picture_type;
+  enc->enc_pic.frame_num = pic->frame_num;
+  enc->enc_pic.pic_order_cnt = pic->pic_order_cnt;
+  enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type;
+  enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0;
+  enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1;
+  enc->enc_pic.not_referenced = pic->not_referenced;
+  enc->enc_pic.is_idr = (pic->picture_type == 
PIPE_H265_ENC_PICTURE_TYPE_IDR) ||
+(pic->picture_type == 
PIPE_H265_ENC_PICTURE_TYPE_I);
+  enc->enc_pic.crop_left = 0;
+  enc->enc_pic.crop_right = (align(enc->base.width, 16) - enc->base.width) 
/ 2;
+  enc->enc_pic.crop_top = 0;
+  enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - 
enc->base.height) / 2;
+  enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag;
+  enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc;
+  enc->enc_pic.general_level_idc = pic->seq.general_level_idc;
+  enc->enc_pic.max_poc = pic->seq.intra_period;
+  enc->enc_pic.log2_max_poc = 0;
+  for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++)
+ i = (i >> 1);
+  enc->enc_pic.chroma_format_idc = pic->seq.chroma_format_idc;
+  enc->enc_pic.pic_width_in_luma_samples = 
pic->seq.pic_width_in_luma_samples;
+  enc->enc_pic.pic_height_in_luma_samples = 
pic->seq.pic_height_in_luma_samples;
+  

Re: [Mesa-dev] [PATCH] i965: Enable disk shader cache by default

2018-02-05 Thread Jason Ekstrand
On Sat, Feb 3, 2018 at 2:58 PM, Jordan Justen 
wrote:

> On 2018-02-03 14:24:06, Jason Ekstrand wrote:
> > On February 3, 2018 13:59:40 Jordan Justen 
> wrote:
> >
> > > Signed-off-by: Jordan Justen 
> > > Reviewed-by: Timothy Arceri 
> > > ---
> > >  docs/relnotes/18.1.0.html  | 1 +
> > >  src/mesa/drivers/dri/i965/brw_disk_cache.c | 3 ---
> > >  2 files changed, 1 insertion(+), 3 deletions(-)
> > >
> > > diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html
> > > index b8a0cd0d02c..0a5878ea41f 100644
> > > --- a/docs/relnotes/18.1.0.html
> > > +++ b/docs/relnotes/18.1.0.html
> > > @@ -46,6 +46,7 @@ Note: some of the new features are only available
> with
> > > certain drivers.
> > >  
> > >  GL_EXT_semaphore on radeonsi
> > >  GL_EXT_semaphore_fd on radeonsi
> > > +Disk shader cache support for i965 enabled by default
> > >  
> > >
> > >  Bug fixes
> > > diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c
> > > b/src/mesa/drivers/dri/i965/brw_disk_cache.c
> > > index f989456bcde..41f742e858f 100644
> > > --- a/src/mesa/drivers/dri/i965/brw_disk_cache.c
> > > +++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c
> > > @@ -407,9 +407,6 @@ void
> > >  brw_disk_cache_init(struct intel_screen *screen)
> > >  {
> > >  #ifdef ENABLE_SHADER_CACHE
> > > -   if (env_var_as_boolean("MESA_GLSL_CACHE_DISABLE", true))
> > > -  return;
> >
> > Should we just flip the default so we still have the environment variable
> > to shut it off if we have problems?
>
> The disk_cache_create function (called later) also looks at the same
> variable, and it defaults to enabling the shader cache.
>
> That's the reason I chose to use this variable name, even though it
> has meant that we had to use a double negative
> (MESA_GLSL_CACHE_DISABLE=0) to allow the i965 disk shader cache to be
> enabled.
>

Fair enough.  I figured it was something like that.  I just wanted to
double-check. :-)

--Jason



> -Jordan
>
> >
> > > -
> > > char renderer[10];
> > > MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer),
> "i965_%04x",
> > > screen->deviceID);
> > > --
> > > 2.15.1
> > >
> > > ___
> > > mesa-dev mailing list
> > > mesa-dev@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
> >
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] i965/gen10: Use CS Stall instead of WriteImmediate.

2018-02-05 Thread Rafael Antognolli
The first 2 patches of this series should be added to branch 18.0 too.

On Fri, Jan 26, 2018 at 11:32:38AM -0800, Rafael Antognolli wrote:
> Fixes: ca19ee33d7d39cb89d948b1c983763065975ce5b
> Signed-off-by: Rafael Antognolli 
> Cc: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_pipe_control.c | 10 --
>  1 file changed, 4 insertions(+), 6 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
> b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> index eb8ada63129..e5b3ffe640c 100644
> --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
> +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> @@ -353,12 +353,10 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw)
>  void
>  gen10_emit_isp_disable(struct brw_context *brw)
>  {
> -   const struct gen_device_info *devinfo = >screen->devinfo;
> -
> -   brw_emit_pipe_control_write(brw,
> -   PIPE_CONTROL_ISP_DIS |
> -   PIPE_CONTROL_WRITE_IMMEDIATE,
> -   brw->workaround_bo, 0, 0);
> +   brw_emit_pipe_control(brw,
> + PIPE_CONTROL_ISP_DIS |
> + PIPE_CONTROL_CS_STALL,
> + NULL, 0, 0);
>  
> brw->vs.base.push_constants_dirty = true;
> brw->tcs.base.push_constants_dirty = true;
> -- 
> 2.14.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] meson: better defaults for osx, windows and cygwin

2018-02-05 Thread Dylan Baker
Quoting Jon Turney (2018-02-03 13:19:20)
> On 03/02/2018 18:07, Dylan Baker wrote:
> > Quoting Jon Turney (2018-02-03 05:49:40)
> >> -  if not ['darwin', 'windows'].contains(host_machine.system())
> >> +  if not ['darwin', 'windows', 'cygwin'].contains(host_machine.system())
> >> +# TODO: PPC, Sparc
> >>   if ['x86', 'x86_64'].contains(host_machine.cpu_family())
> >> _drivers = 'i915,i965,r100,r200,nouveau'
> >>   else
> >> error('Unknown architecture. Please pass -Ddri-drivers to set 
> >> driver options. Patches gladly accepted to fix this.')
> >>   endif
> >> else
> >> -error('Unknown OS. Please pass -Ddri-drivers to set driver options. 
> >> Patches gladly accepted to fix this.')
> >> +# only swrast would make sense here, but gallium swrast is a much 
> >> better default
> >> +_drivers = ''
> > 
> > I'm really not a fan of dumping the 'else error' case. This currently means 
> > that
> > for example haiku will try to build something that they cannot support. I'd
> > really rather just set appropriate defaults for OSes that are guaranteed
> > supported and still let OSes that haven't been tested fall through to 
> > error. I
> > also think that's a nice place for people trying to use mesa meson on a new
> > platform, since they understand we haven't tested on their OS.
> 
> Good idea.  But that's not what the code currently does.  If it's not on 
> the list of 'unknown' OSes (darwin, windows), any other OS e.g. haiku 
> gets treated like linux...
> 
> Attached is a revised patch which is more explicit about what's a known 
> OS.  I guess the BSDs probably should be added somewhere, but idk what's 
> appropriate for them.
> 
> 

I rather like this patch, so you can add:
Reviewed-by: Dylan Baker 

The only thing I might do differently is instead of checking for Linux use the
`system_has_kms_drm` variable (which covers the BSDs as well as Linux, but I'm
okay with landing this as-is and changing that later if that is the right thing
to do.

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] i965/nir: do int64 lowering before optimization

2018-02-05 Thread Matt Turner
On Mon, Feb 5, 2018 at 5:40 AM, Iago Toral Quiroga  wrote:
> Otherwise loop unrolling will fail to see the actual cost of
> the unrolling operations when the loop body contains 64-bit integer
> instructions, and very specially when the divmod64 lowering applies,
> since its lowering is quite expensive.
>
> Without this change, some in-development CTS tests for int64
> get stuck forever trying to register allocate a shader with
> over 50K SSA values. The large number of SSA values is the result
> of NIR first unrolling multiple seemingly simple loops that involve
> int64 instructions, only to then lower these instructions to produce
> a massive pile of code (due to the divmod64 lowering in the unrolled
> instructions).
>
> With this change, loop unrolling will see the loops with the int64
> code already lowered and will realize that it is too expensive to
> unroll.
>
> v2: Run nir_algebraic first so we can hopefully get rid of some of
> the int64 instructions before we even attempt to lower them.
> ---
>
> For reference, I captured execution times for the CTS tests that
> raised the problem. This is with debug builds of Mesa and CTS so
> they are not ideal, but I think they are sufficient to see the
> imapact of the patch.
>
> With this patch: 52s
> With this v1:56s
> With master: 1m:38s (*)
>
> (*) This is actually a significant improvement that has happened in
> master since we sent the original patch. Originally, the tests would
> just hang forever trying to compile.

Seems like a step in the right direction.

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/8] drivers/radeon:add uvd hevc enc functions

2018-02-05 Thread Alex Deucher
On Mon, Feb 5, 2018 at 12:16 PM, James Zhu  wrote:
> Implement UVD hevc encode functions
>
> Signed-off-by: James Zhu 
> ---
>  1   |  21 ++
>  src/gallium/drivers/radeon/radeon_uvd_enc.c | 340 
> 
>  2 files changed, 361 insertions(+)
>  create mode 100644 1
>  create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c
>
> diff --git a/1 b/1
> new file mode 100644
> index 000..51dd09e
> --- /dev/null
> +++ b/1
> @@ -0,0 +1,21 @@
> +r c80294d drivers/radeon:Add uvd hevc enc hw interface header
> +pick 2d924d5 drivers/radeon:add uvd hevc enc hw ib implementation
> +
> +# Rebase f2b9031..2d924d5 onto f2b9031 (2 command(s))
> +#
> +# Commands:
> +# p, pick = use commit
> +# r, reword = use commit, but edit the commit message
> +# e, edit = use commit, but stop for amending
> +# s, squash = use commit, but meld into previous commit
> +# f, fixup = like "squash", but discard this commit's log message
> +# x, exec = run command (the rest of the line) using shell
> +# d, drop = remove commit
> +#
> +# These lines can be re-ordered; they are executed from top to bottom.
> +#
> +# If you remove a line here THAT COMMIT WILL BE LOST.
> +#
> +# However, if you remove everything, the rebase will be aborted.
> +#
> +# Note that empty commits are commented out

Looks like some garbage got accidently added here.


> diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c 
> b/src/gallium/drivers/radeon/radeon_uvd_enc.c
> new file mode 100644
> index 000..6eb6cda
> --- /dev/null
> +++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c
> @@ -0,0 +1,340 @@
> +/**
> + *
> + * Copyright 2018 Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction, including
> + * without limitation the rights to use, copy, modify, merge, publish,
> + * distribute, sub license, and/or sell copies of the Software, and to
> + * permit persons to whom the Software is furnished to do so, subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> + * next paragraph) shall be included in all copies or substantial portions
> + * of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
> + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> + **/
> +
> +#include 
> +
> +#include "pipe/p_video_codec.h"
> +
> +#include "util/u_video.h"
> +#include "util/u_memory.h"
> +
> +#include "vl/vl_video_buffer.h"
> +
> +#include "radeonsi/si_pipe.h"
> +#include "radeon_video.h"
> +#include "radeon_uvd_enc.h"
> +
> +static void radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc, struct 
> pipe_h265_enc_picture_desc *picture)
> +{
> +  struct pipe_h265_enc_picture_desc *pic = (struct 
> pipe_h265_enc_picture_desc *)picture;
> +  enc->enc_pic.picture_type = pic->picture_type;
> +  enc->enc_pic.frame_num = pic->frame_num;
> +  enc->enc_pic.pic_order_cnt = pic->pic_order_cnt;
> +  enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type;
> +  enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0;
> +  enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1;
> +  enc->enc_pic.not_referenced = pic->not_referenced;
> +  enc->enc_pic.is_idr = (pic->picture_type == 
> PIPE_H265_ENC_PICTURE_TYPE_IDR) ||
> +(pic->picture_type == 
> PIPE_H265_ENC_PICTURE_TYPE_I);
> +  enc->enc_pic.crop_left = 0;
> +  enc->enc_pic.crop_right = (align(enc->base.width, 16) - 
> enc->base.width) / 2;
> +  enc->enc_pic.crop_top = 0;
> +  enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - 
> enc->base.height) / 2;
> +  enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag;
> +  enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc;
> +  enc->enc_pic.general_level_idc = pic->seq.general_level_idc;
> +  enc->enc_pic.max_poc = pic->seq.intra_period;
> +  enc->enc_pic.log2_max_poc = 0;
> +  for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++)
> + i = (i >> 1);
> +  enc->enc_pic.chroma_format_idc = pic->seq.chroma_format_idc;
> +  enc->enc_pic.pic_width_in_luma_samples = 
> pic->seq.pic_width_in_luma_samples;
> + 

[Mesa-dev] [Bug 104949] swrast: Epiphany WEB browser core dumps under Mesa 17.3.3

2018-02-05 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104949

--- Comment #3 from Daniel Stone  ---
Yeah, good catch! Bug filed: https://bugs.webkit.org/show_bug.cgi?id=182490

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: rename gl_vertex_array_object::_VertexAttrib -> _VertexArray

2018-02-05 Thread Brian Paul
Since the type is gl_vertex_array.  Update comment to explain that
these arrays are only used by the VBO module.

Also rename some local variables in _mesa_update_vao_derived_arrays().
---
 src/mesa/main/arrayobj.c  | 13 ++---
 src/mesa/main/attrib.c|  2 +-
 src/mesa/main/mtypes.h|  4 ++--
 src/mesa/vbo/vbo_exec_array.c |  2 +-
 4 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c
index 360d097..a6fa33c 100644
--- a/src/mesa/main/arrayobj.c
+++ b/src/mesa/main/arrayobj.c
@@ -283,8 +283,8 @@ unbind_array_object_vbos(struct gl_context *ctx, struct 
gl_vertex_array_object *
for (i = 0; i < ARRAY_SIZE(obj->BufferBinding); i++)
   _mesa_reference_buffer_object(ctx, >BufferBinding[i].BufferObj, 
NULL);
 
-   for (i = 0; i < ARRAY_SIZE(obj->_VertexAttrib); i++)
-  _mesa_reference_buffer_object(ctx, >_VertexAttrib[i].BufferObj, 
NULL);
+   for (i = 0; i < ARRAY_SIZE(obj->_VertexArray); i++)
+  _mesa_reference_buffer_object(ctx, >_VertexArray[i].BufferObj, 
NULL);
 }
 
 
@@ -453,14 +453,13 @@ _mesa_update_vao_derived_arrays(struct gl_context *ctx,
 
while (arrays) {
   const int attrib = u_bit_scan();
-  struct gl_vertex_array *client_array = >_VertexAttrib[attrib];
-  const struct gl_array_attributes *attrib_array =
+  struct gl_vertex_array *array = >_VertexArray[attrib];
+  const struct gl_array_attributes *attribs =
  >VertexAttrib[attrib];
   const struct gl_vertex_buffer_binding *buffer_binding =
- >BufferBinding[attrib_array->BufferBindingIndex];
+ >BufferBinding[attribs->BufferBindingIndex];
 
-  _mesa_update_vertex_array(ctx, client_array, attrib_array,
-buffer_binding);
+  _mesa_update_vertex_array(ctx, array, attribs, buffer_binding);
}
 }
 
diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
index a9e4a11..8ac5db0 100644
--- a/src/mesa/main/attrib.c
+++ b/src/mesa/main/attrib.c
@@ -1503,7 +1503,7 @@ copy_array_object(struct gl_context *ctx,
/* skip RefCount */
 
for (i = 0; i < ARRAY_SIZE(src->VertexAttrib); i++) {
-  _mesa_copy_vertex_array(ctx, >_VertexAttrib[i], 
>_VertexAttrib[i]);
+  _mesa_copy_vertex_array(ctx, >_VertexArray[i], 
>_VertexArray[i]);
   _mesa_copy_vertex_attrib_array(ctx, >VertexAttrib[i], 
>VertexAttrib[i]);
   _mesa_copy_vertex_buffer_binding(ctx, >BufferBinding[i], 
>BufferBinding[i]);
}
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 3a67d43..aa083c3 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1603,9 +1603,9 @@ struct gl_vertex_array_object
 * Derived vertex attribute arrays
 *
 * This is a legacy data structure created from gl_array_attributes and
-* gl_vertex_buffer_binding, for compatibility with existing driver code.
+* gl_vertex_buffer_binding, only used by the VBO module at this time.
 */
-   struct gl_vertex_array _VertexAttrib[VERT_ATTRIB_MAX];
+   struct gl_vertex_array _VertexArray[VERT_ATTRIB_MAX];
 
/** Vertex attribute arrays */
struct gl_array_attributes VertexAttrib[VERT_ATTRIB_MAX];
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 42759d5..a5bedc8 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -314,7 +314,7 @@ recalculate_input_bindings(struct gl_context *ctx)
struct vbo_context *vbo = vbo_context(ctx);
struct vbo_exec_context *exec = >exec;
const struct gl_vertex_array_object *vao = ctx->Array.VAO;
-   const struct gl_vertex_array *vertexAttrib = vao->_VertexAttrib;
+   const struct gl_vertex_array *vertexAttrib = vao->_VertexArray;
const struct gl_vertex_array **inputs = >array.inputs[0];
 
/* May shuffle the position and generic0 bits around */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] st/mesa: refactor st_bufferobj_map_range()

2018-02-05 Thread Brian Paul
Use a new helper function, st_access_flags_to_transfer_flags(), to
convert the GL_MAP_x flags to PIPE_TRANSFER_x flags.

We'll be able to use this function in a couple other places.
---
 src/mesa/state_tracker/st_cb_bufferobjects.c | 38 
 src/mesa/state_tracker/st_cb_bufferobjects.h |  4 +++
 2 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c 
b/src/mesa/state_tracker/st_cb_bufferobjects.c
index 6b64ba1..044916b 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -427,17 +427,13 @@ st_bufferobj_invalidate(struct gl_context *ctx,
 
 
 /**
- * Called via glMapBufferRange().
+ * Convert GLbitfield of GL_MAP_x flags to gallium pipe_transfer_usage flags.
+ * \param wholeBuffer  is the whole buffer being mapped?
  */
-static void *
-st_bufferobj_map_range(struct gl_context *ctx,
-   GLintptr offset, GLsizeiptr length, GLbitfield access,
-   struct gl_buffer_object *obj,
-   gl_map_buffer_index index)
+enum pipe_transfer_usage
+st_access_flags_to_transfer_flags(GLbitfield access, bool wholeBuffer)
 {
-   struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_buffer_object *st_obj = st_buffer_object(obj);
-   enum pipe_transfer_usage flags = 0x0;
+   enum pipe_transfer_usage flags = 0;
 
if (access & GL_MAP_WRITE_BIT)
   flags |= PIPE_TRANSFER_WRITE;
@@ -452,7 +448,7 @@ st_bufferobj_map_range(struct gl_context *ctx,
   flags |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
}
else if (access & GL_MAP_INVALIDATE_RANGE_BIT) {
-  if (offset == 0 && length == obj->Size)
+  if (wholeBuffer)
  flags |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
   else
  flags |= PIPE_TRANSFER_DISCARD_RANGE;
@@ -473,15 +469,35 @@ st_bufferobj_map_range(struct gl_context *ctx,
if (access & MESA_MAP_NOWAIT_BIT)
   flags |= PIPE_TRANSFER_DONTBLOCK;
 
+   return flags;
+}
+
+
+/**
+ * Called via glMapBufferRange().
+ */
+static void *
+st_bufferobj_map_range(struct gl_context *ctx,
+   GLintptr offset, GLsizeiptr length, GLbitfield access,
+   struct gl_buffer_object *obj,
+   gl_map_buffer_index index)
+{
+   struct pipe_context *pipe = st_context(ctx)->pipe;
+   struct st_buffer_object *st_obj = st_buffer_object(obj);
+
assert(offset >= 0);
assert(length >= 0);
assert(offset < obj->Size);
assert(offset + length <= obj->Size);
 
+   const enum pipe_transfer_usage transfer_flags =
+  st_access_flags_to_transfer_flags(access,
+offset == 0 && length == obj->Size);
+
obj->Mappings[index].Pointer = pipe_buffer_map_range(pipe,
 st_obj->buffer,
 offset, length,
-flags,
+transfer_flags,
 
_obj->transfer[index]);
if (obj->Mappings[index].Pointer) {
   obj->Mappings[index].Offset = offset;
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.h 
b/src/mesa/state_tracker/st_cb_bufferobjects.h
index ea77c58..534506a 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.h
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.h
@@ -55,6 +55,10 @@ st_buffer_object(struct gl_buffer_object *obj)
 }
 
 
+enum pipe_transfer_usage
+st_access_flags_to_transfer_flags(GLbitfield access, bool wholeBuffer);
+
+
 extern void
 st_init_bufferobject_functions(struct pipe_screen *screen,
struct dd_function_table *functions);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] st/mesa: use st_access_flags_to_transfer_flags() helper in more places

2018-02-05 Thread Brian Paul
---
 src/mesa/state_tracker/st_cb_fbo.c | 18 +-
 src/mesa/state_tracker/st_cb_texture.c | 17 -
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_fbo.c 
b/src/mesa/state_tracker/st_cb_fbo.c
index 3a5c03c..0800f5b 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -47,6 +47,7 @@
 #include "pipe/p_screen.h"
 #include "st_atom.h"
 #include "st_context.h"
+#include "st_cb_bufferobjects.h"
 #include "st_cb_fbo.h"
 #include "st_cb_flush.h"
 #include "st_cb_texture.h"
@@ -780,7 +781,6 @@ st_MapRenderbuffer(struct gl_context *ctx,
struct st_renderbuffer *strb = st_renderbuffer(rb);
struct pipe_context *pipe = st->pipe;
const GLboolean invert = rb->Name == 0;
-   unsigned usage;
GLuint y2;
GLubyte *map;
 
@@ -800,13 +800,13 @@ st_MapRenderbuffer(struct gl_context *ctx,
   return;
}
 
-   usage = 0x0;
-   if (mode & GL_MAP_READ_BIT)
-  usage |= PIPE_TRANSFER_READ;
-   if (mode & GL_MAP_WRITE_BIT)
-  usage |= PIPE_TRANSFER_WRITE;
-   if (mode & GL_MAP_INVALIDATE_RANGE_BIT)
-  usage |= PIPE_TRANSFER_DISCARD_RANGE;
+   /* Check for unexpected flags */
+   assert((mode & ~(GL_MAP_READ_BIT |
+GL_MAP_WRITE_BIT |
+GL_MAP_INVALIDATE_RANGE_BIT)) == 0);
+
+   const enum pipe_transfer_usage transfer_flags =
+  st_access_flags_to_transfer_flags(mode, false);
 
/* Note: y=0=bottom of buffer while y2=0=top of buffer.
 * 'invert' will be true for window-system buffers and false for
@@ -821,7 +821,7 @@ st_MapRenderbuffer(struct gl_context *ctx,
 strb->texture,
 strb->surface->u.tex.level,
 strb->surface->u.tex.first_layer,
-usage, x, y2, w, h, >transfer);
+transfer_flags, x, y2, w, h, >transfer);
if (map) {
   if (invert) {
  *rowStrideOut = -(int) strb->transfer->stride;
diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index 98f2443..6345ead 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -254,19 +254,18 @@ st_MapTextureImage(struct gl_context *ctx,
 {
struct st_context *st = st_context(ctx);
struct st_texture_image *stImage = st_texture_image(texImage);
-   unsigned pipeMode;
GLubyte *map;
struct pipe_transfer *transfer;
 
-   pipeMode = 0x0;
-   if (mode & GL_MAP_READ_BIT)
-  pipeMode |= PIPE_TRANSFER_READ;
-   if (mode & GL_MAP_WRITE_BIT)
-  pipeMode |= PIPE_TRANSFER_WRITE;
-   if (mode & GL_MAP_INVALIDATE_RANGE_BIT)
-  pipeMode |= PIPE_TRANSFER_DISCARD_RANGE;
+   /* Check for unexpected flags */
+   assert((mode & ~(GL_MAP_READ_BIT |
+GL_MAP_WRITE_BIT |
+GL_MAP_INVALIDATE_RANGE_BIT)) == 0);
 
-   map = st_texture_image_map(st, stImage, pipeMode, x, y, slice, w, h, 1,
+   const enum pipe_transfer_usage transfer_flags =
+  st_access_flags_to_transfer_flags(mode, false);
+
+   map = st_texture_image_map(st, stImage, transfer_flags, x, y, slice, w, h, 
1,
   );
if (map) {
   if (st_etc_fallback(st, texImage)) {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] st/mesa: refactor bufferobj_data()

2018-02-05 Thread Brian Paul
Split out some of the code into three new helper functions:
buffer_target_to_bind_flags(), storage_flags_to_buffer_flags(),
buffer_usage() to make the code more managable.
---
 src/mesa/state_tracker/st_cb_bufferobjects.c | 191 +++
 1 file changed, 104 insertions(+), 87 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c 
b/src/mesa/state_tracker/st_cb_bufferobjects.c
index a9104a9..6b64ba1 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -164,107 +164,84 @@ st_bufferobj_get_subdata(struct gl_context *ctx,
 offset, size, data);
 }
 
-static ALWAYS_INLINE GLboolean
-bufferobj_data(struct gl_context *ctx,
-   GLenum target,
-   GLsizeiptrARB size,
-   const void *data,
-   struct gl_memory_object *memObj,
-   GLuint64 offset,
-   GLenum usage,
-   GLbitfield storageFlags,
-   struct gl_buffer_object *obj)
-{
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
-   struct pipe_screen *screen = pipe->screen;
-   struct st_buffer_object *st_obj = st_buffer_object(obj);
-   struct st_memory_object *st_mem_obj = st_memory_object(memObj);
-   unsigned bind, pipe_usage, pipe_flags = 0;
-
-   if (target != GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD &&
-   size && st_obj->buffer &&
-   st_obj->Base.Size == size &&
-   st_obj->Base.Usage == usage &&
-   st_obj->Base.StorageFlags == storageFlags) {
-  if (data) {
- /* Just discard the old contents and write new data.
-  * This should be the same as creating a new buffer, but we avoid
-  * a lot of validation in Mesa.
-  */
- pipe->buffer_subdata(pipe, st_obj->buffer,
-  PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE,
-  0, size, data);
- return GL_TRUE;
-  } else if (screen->get_param(screen, PIPE_CAP_INVALIDATE_BUFFER)) {
- pipe->invalidate_resource(pipe, st_obj->buffer);
- return GL_TRUE;
-  }
-   }
-
-   st_obj->Base.Size = size;
-   st_obj->Base.Usage = usage;
-   st_obj->Base.StorageFlags = storageFlags;
 
+/**
+ * Return bitmask of PIPE_BIND_x flags corresponding a GL buffer target.
+ */
+static unsigned
+buffer_target_to_bind_flags(GLenum target)
+{
switch (target) {
case GL_PIXEL_PACK_BUFFER_ARB:
case GL_PIXEL_UNPACK_BUFFER_ARB:
-  bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
-  break;
+  return PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
case GL_ARRAY_BUFFER_ARB:
-  bind = PIPE_BIND_VERTEX_BUFFER;
-  break;
+  return PIPE_BIND_VERTEX_BUFFER;
case GL_ELEMENT_ARRAY_BUFFER_ARB:
-  bind = PIPE_BIND_INDEX_BUFFER;
-  break;
+  return PIPE_BIND_INDEX_BUFFER;
case GL_TEXTURE_BUFFER:
-  bind = PIPE_BIND_SAMPLER_VIEW;
-  break;
+  return PIPE_BIND_SAMPLER_VIEW;
case GL_TRANSFORM_FEEDBACK_BUFFER:
-  bind = PIPE_BIND_STREAM_OUTPUT;
-  break;
+  return PIPE_BIND_STREAM_OUTPUT;
case GL_UNIFORM_BUFFER:
-  bind = PIPE_BIND_CONSTANT_BUFFER;
-  break;
+  return PIPE_BIND_CONSTANT_BUFFER;
case GL_DRAW_INDIRECT_BUFFER:
case GL_PARAMETER_BUFFER_ARB:
-  bind = PIPE_BIND_COMMAND_ARGS_BUFFER;
-  break;
+  return PIPE_BIND_COMMAND_ARGS_BUFFER;
case GL_ATOMIC_COUNTER_BUFFER:
case GL_SHADER_STORAGE_BUFFER:
-  bind = PIPE_BIND_SHADER_BUFFER;
-  break;
+  return PIPE_BIND_SHADER_BUFFER;
case GL_QUERY_BUFFER:
-  bind = PIPE_BIND_QUERY_BUFFER;
-  break;
+  return PIPE_BIND_QUERY_BUFFER;
default:
-  bind = 0;
+  return 0;
}
+}
 
-   /* Set usage. */
-   if (st_obj->Base.Immutable) {
+
+/**
+ * Return bitmask of PIPE_RESOURCE_x flags corresponding to GL_MAP_x flags.
+ */
+static unsigned
+storage_flags_to_buffer_flags(GLbitfield storageFlags)
+{
+   unsigned flags = 0;
+   if (storageFlags & GL_MAP_PERSISTENT_BIT)
+  flags |= PIPE_RESOURCE_FLAG_MAP_PERSISTENT;
+   if (storageFlags & GL_MAP_COHERENT_BIT)
+  flags |= PIPE_RESOURCE_FLAG_MAP_COHERENT;
+   if (storageFlags & GL_SPARSE_STORAGE_BIT_ARB)
+  flags |= PIPE_RESOURCE_FLAG_SPARSE;
+   return flags;
+}
+
+
+/**
+ * From a buffer object's target, immutability flag, storage flags and
+ * usage hint, return a pipe_resource_usage value (PIPE_USAGE_DYNAMIC,
+ * STREAM, etc).
+ */
+static const enum pipe_resource_usage
+buffer_usage(GLenum target, GLboolean immutable,
+ GLbitfield storageFlags, GLenum usage)
+{
+   if (immutable) {
   /* BufferStorage */
   if (storageFlags & GL_CLIENT_STORAGE_BIT) {
  if (storageFlags & GL_MAP_READ_BIT)
-pipe_usage = PIPE_USAGE_STAGING;
+return PIPE_USAGE_STAGING;
  else
-pipe_usage = PIPE_USAGE_STREAM;
+return PIPE_USAGE_STREAM;
   } else 

[Mesa-dev] [Bug 104777] Attaching multiple shader objects for the same stage to a GLSL program triggers a linker error

2018-02-05 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104777

Juan A. Suarez  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #3 from Juan A. Suarez  ---
commit 4195eed961ccfe404ae81b9112189fc93a254ded
Author: Juan A. Suarez Romero 
Date:   Mon Feb 5 17:38:39 2018 +0100

glsl/linker: check same name is not used in block and outside

According with OpenGL GLSL 3.20 spec, section 4.3.9:

  "It is a link-time error if any particular shader interface
   contains:
 - two different blocks, each having no instance name, and each
   having a member of the same name, or
 - a variable outside a block, and a block with no instance name,
   where the variable has the same name as a member in the block."

This fixes a previous commit 9b894c8 ("glsl/linker: link-error using the
same name in unnamed block and outside") that covered this case, but
did not take in account that precision qualifiers are ignored when
comparing blocks with no instance name.

With this commit, the original tests
KHR-GL*.shaders.uniform_block.common.name_matching keep fixed, and also
dEQP-GLES31.functional.shaders.linkage.uniform.block.differing_precision
regression is fixed, which was broken by previous commit.

v2: use helper varibles (Matteo Bruni)

Fixes: 9b894c8 ("glsl/linker: link-error using the same name in unnamed
block and outside")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104668
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104777
CC: Mark Janes 
CC: "18.0" 
Tested-by: Matteo Bruni 
Reviewed-by: Tapani Pälli 
Signed-off-by: Juan A. Suarez Romero 

 src/compiler/glsl/linker.cpp | 53
++---
 1 file changed, 30 insertions(+), 23 deletions(-)

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 8/8] drivers/radeonsi: enable uvd encode for HEVC main

2018-02-05 Thread James Zhu
Enable UVD encode for HEVC main profile

Signed-off-by: James Zhu 
---
 src/gallium/drivers/radeonsi/si_get.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_get.c 
b/src/gallium/drivers/radeonsi/si_get.c
index 8002362..64f76b4 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -24,6 +24,7 @@
 #include "si_pipe.h"
 #include "radeon/radeon_video.h"
 #include "radeon/radeon_vce.h"
+#include "radeon/radeon_uvd_enc.h"
 #include "ac_llvm_util.h"
 #include "vl/vl_decoder.h"
 #include "vl/vl_video_buffer.h"
@@ -587,7 +588,8 @@ static int si_get_video_param(struct pipe_screen *screen,
(si_vce_is_fw_version_supported(sscreen) ||
sscreen->info.family == CHIP_RAVEN)) ||
(profile == PIPE_VIDEO_PROFILE_HEVC_MAIN &&
-   sscreen->info.family == CHIP_RAVEN);
+   (sscreen->info.family == CHIP_RAVEN ||
+   si_radeon_uvd_enc_supported(sscreen)));
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_MAX_WIDTH:
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/8] drivers/radeon:add uvd hevc enc functions

2018-02-05 Thread James Zhu
Implement UVD hevc encode functions

Signed-off-by: James Zhu 
---
 1   |  21 ++
 src/gallium/drivers/radeon/radeon_uvd_enc.c | 340 
 2 files changed, 361 insertions(+)
 create mode 100644 1
 create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c

diff --git a/1 b/1
new file mode 100644
index 000..51dd09e
--- /dev/null
+++ b/1
@@ -0,0 +1,21 @@
+r c80294d drivers/radeon:Add uvd hevc enc hw interface header
+pick 2d924d5 drivers/radeon:add uvd hevc enc hw ib implementation
+
+# Rebase f2b9031..2d924d5 onto f2b9031 (2 command(s))
+#
+# Commands:
+# p, pick = use commit
+# r, reword = use commit, but edit the commit message
+# e, edit = use commit, but stop for amending
+# s, squash = use commit, but meld into previous commit
+# f, fixup = like "squash", but discard this commit's log message
+# x, exec = run command (the rest of the line) using shell
+# d, drop = remove commit
+#
+# These lines can be re-ordered; they are executed from top to bottom.
+#
+# If you remove a line here THAT COMMIT WILL BE LOST.
+#
+# However, if you remove everything, the rebase will be aborted.
+#
+# Note that empty commits are commented out
diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c 
b/src/gallium/drivers/radeon/radeon_uvd_enc.c
new file mode 100644
index 000..6eb6cda
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c
@@ -0,0 +1,340 @@
+/**
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **/
+
+#include 
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+
+#include "radeonsi/si_pipe.h"
+#include "radeon_video.h"
+#include "radeon_uvd_enc.h"
+
+static void radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc, struct 
pipe_h265_enc_picture_desc *picture)
+{
+  struct pipe_h265_enc_picture_desc *pic = (struct 
pipe_h265_enc_picture_desc *)picture;
+  enc->enc_pic.picture_type = pic->picture_type;
+  enc->enc_pic.frame_num = pic->frame_num;
+  enc->enc_pic.pic_order_cnt = pic->pic_order_cnt;
+  enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type;
+  enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0;
+  enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1;
+  enc->enc_pic.not_referenced = pic->not_referenced;
+  enc->enc_pic.is_idr = (pic->picture_type == 
PIPE_H265_ENC_PICTURE_TYPE_IDR) ||
+(pic->picture_type == 
PIPE_H265_ENC_PICTURE_TYPE_I);
+  enc->enc_pic.crop_left = 0;
+  enc->enc_pic.crop_right = (align(enc->base.width, 16) - enc->base.width) 
/ 2;
+  enc->enc_pic.crop_top = 0;
+  enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - 
enc->base.height) / 2;
+  enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag;
+  enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc;
+  enc->enc_pic.general_level_idc = pic->seq.general_level_idc;
+  enc->enc_pic.max_poc = pic->seq.intra_period;
+  enc->enc_pic.log2_max_poc = 0;
+  for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++)
+ i = (i >> 1);
+  enc->enc_pic.chroma_format_idc = pic->seq.chroma_format_idc;
+  enc->enc_pic.pic_width_in_luma_samples = 
pic->seq.pic_width_in_luma_samples;
+  enc->enc_pic.pic_height_in_luma_samples = 
pic->seq.pic_height_in_luma_samples;
+  enc->enc_pic.log2_diff_max_min_luma_coding_block_size = 
pic->seq.log2_diff_max_min_luma_coding_block_size;
+  enc->enc_pic.log2_min_transform_block_size_minus2 = 
pic->seq.log2_min_transform_block_size_minus2;
+  

[Mesa-dev] [PATCH 2/8] amdgpu/drm:add uvd hevc enc support in amdgpu cs

2018-02-05 Thread James Zhu
Signed-off-by: James Zhu 
---
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 1927a3a..6f305b7 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -376,6 +376,7 @@ static bool amdgpu_cs_has_user_fence(struct 
amdgpu_cs_context *cs)
 {
return cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD &&
   cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCE &&
+  cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD_ENC &&
   cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_DEC &&
   cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_ENC;
 }
@@ -818,6 +819,10 @@ static bool amdgpu_init_cs_context(struct 
amdgpu_cs_context *cs,
   cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD;
   break;
 
+   case RING_UVD_ENC:
+  cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD_ENC;
+  break;
+
case RING_VCE:
   cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCE;
   break;
@@ -1533,6 +1538,7 @@ static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
   ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4;
   break;
case RING_UVD:
+   case RING_UVD_ENC:
   while (rcs->current.cdw & 15)
  radeon_emit(rcs, 0x8000); /* type2 nop packet */
   break;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/8] amd/common:add uvd hevc enc support check in hw query

2018-02-05 Thread James Zhu
Based on amdgpu hardware query information to check if UVD hevc enc support

Signed-off-by: James Zhu 
---
 src/amd/common/ac_gpu_info.c | 10 +-
 src/amd/common/ac_gpu_info.h |  1 +
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 6d9dcb5..2494967 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -98,7 +98,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 {
struct amdgpu_buffer_size_alignments alignment_info = {};
struct amdgpu_heap_info vram, vram_vis, gtt;
-   struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, vce = 
{}, vcn_dec = {}, vcn_enc = {};
+   struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, uvd_enc 
= {}, vce = {}, vcn_dec = {}, vcn_enc = {};
uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature 
= 0;
int r, i, j;
drmDevicePtr devinfo;
@@ -166,6 +166,12 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
return false;
}
 
+   r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_UVD_ENC, 0, _enc);
+   if (r) {
+   fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd_enc) 
failed.\n");
+   return false;
+   }
+
if (info->drm_major == 3 && info->drm_minor >= 17) {
r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_DEC, 0, 
_dec);
if (r) {
@@ -275,6 +281,8 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
uvd.available_rings ? uvd_version : 0;
info->vce_fw_version =
vce.available_rings ? vce_version : 0;
+   info->uvd_enc_supported =
+   uvd_enc.available_rings ? true : false;
info->has_userptr = true;
info->has_syncobj = has_syncobj(fd);
info->has_syncobj_wait_for_submit = info->has_syncobj && 
info->drm_minor >= 20;
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index cca3e98..6b120d1 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -65,6 +65,7 @@ struct radeon_info {
uint32_tnum_compute_rings;
uint32_tuvd_fw_version;
uint32_tvce_fw_version;
+   booluvd_enc_supported;
uint32_tme_fw_version;
uint32_tme_fw_feature;
uint32_tpfp_fw_version;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >