[Mesa-dev] [PATCH] mesa: Factor out _mesa_disable_vertex_array_attrib.
From: Mathias FröhlichHi, Simple code deduplication and factoring out a function that will be usefull soon. please review thanks!! Mathias And use it in the enable code path. Move _mesa_update_attribute_map_mode into its only remaining file. Signed-off-by: Mathias Fröhlich --- src/mesa/main/arrayobj.h | 26 src/mesa/main/enable.c | 64 ++-- src/mesa/main/varray.c | 58 --- src/mesa/main/varray.h | 7 ++ 4 files changed, 75 insertions(+), 80 deletions(-) diff --git a/src/mesa/main/arrayobj.h b/src/mesa/main/arrayobj.h index 411ed65c50..5de74505bb 100644 --- a/src/mesa/main/arrayobj.h +++ b/src/mesa/main/arrayobj.h @@ -99,32 +99,6 @@ extern const GLubyte _mesa_vao_attribute_map[ATTRIBUTE_MAP_MODE_MAX][VERT_ATTRIB_MAX]; -/** - * Depending on the position and generic0 attributes enable flags select - * the one that is used for both attributes. - * The generic0 attribute takes precedence. - */ -static inline void -_mesa_update_attribute_map_mode(const struct gl_context *ctx, -struct gl_vertex_array_object *vao) -{ - /* -* There is no need to change the mapping away from the -* identity mapping if we are not in compat mode. -*/ - if (ctx->API != API_OPENGL_COMPAT) - return; - /* The generic0 attribute superseeds the position attribute */ - const GLbitfield enabled = vao->_Enabled; - if (enabled & VERT_BIT_GENERIC0) - vao->_AttributeMapMode = ATTRIBUTE_MAP_MODE_GENERIC0; - else if (enabled & VERT_BIT_POS) - vao->_AttributeMapMode = ATTRIBUTE_MAP_MODE_POSITION; - else - vao->_AttributeMapMode = ATTRIBUTE_MAP_MODE_IDENTITY; -} - - /** * Apply the position/generic0 aliasing map to a bitfield from the vao. * Use for example to convert gl_vertex_array_object::_Enabled diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index bc22410bda..967d23080c 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -40,6 +40,7 @@ #include "mtypes.h" #include "enums.h" #include "texstate.h" +#include "varray.h" @@ -58,55 +59,56 @@ update_derived_primitive_restart_state(struct gl_context *ctx) || ctx->Array.PrimitiveRestartFixedIndex; } + +/** + * Helper to enable/disable VAO client-side state. + */ +static void +vao_state(struct gl_context *ctx, gl_vert_attrib attr, GLboolean state) +{ + if (state) + _mesa_enable_vertex_array_attrib(ctx, ctx->Array.VAO, attr); + else + _mesa_disable_vertex_array_attrib(ctx, ctx->Array.VAO, attr); +} + + /** * Helper to enable/disable client-side state. */ static void client_state(struct gl_context *ctx, GLenum cap, GLboolean state) { - struct gl_vertex_array_object *vao = ctx->Array.VAO; - GLbitfield vert_attrib_bit; - GLboolean *enable_var; - switch (cap) { case GL_VERTEX_ARRAY: - enable_var = >VertexAttrib[VERT_ATTRIB_POS].Enabled; - vert_attrib_bit = VERT_BIT_POS; + vao_state(ctx, VERT_ATTRIB_POS, state); break; case GL_NORMAL_ARRAY: - enable_var = >VertexAttrib[VERT_ATTRIB_NORMAL].Enabled; - vert_attrib_bit = VERT_BIT_NORMAL; + vao_state(ctx, VERT_ATTRIB_NORMAL, state); break; case GL_COLOR_ARRAY: - enable_var = >VertexAttrib[VERT_ATTRIB_COLOR0].Enabled; - vert_attrib_bit = VERT_BIT_COLOR0; + vao_state(ctx, VERT_ATTRIB_COLOR0, state); break; case GL_INDEX_ARRAY: - enable_var = >VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled; - vert_attrib_bit = VERT_BIT_COLOR_INDEX; + vao_state(ctx, VERT_ATTRIB_COLOR_INDEX, state); break; case GL_TEXTURE_COORD_ARRAY: - enable_var = >VertexAttrib[VERT_ATTRIB_TEX(ctx->Array.ActiveTexture)].Enabled; - vert_attrib_bit = VERT_BIT_TEX(ctx->Array.ActiveTexture); + vao_state(ctx, VERT_ATTRIB_TEX(ctx->Array.ActiveTexture), state); break; case GL_EDGE_FLAG_ARRAY: - enable_var = >VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled; - vert_attrib_bit = VERT_BIT_EDGEFLAG; + vao_state(ctx, VERT_ATTRIB_EDGEFLAG, state); break; case GL_FOG_COORDINATE_ARRAY_EXT: - enable_var = >VertexAttrib[VERT_ATTRIB_FOG].Enabled; - vert_attrib_bit = VERT_BIT_FOG; + vao_state(ctx, VERT_ATTRIB_FOG, state); break; case GL_SECONDARY_COLOR_ARRAY_EXT: - enable_var = >VertexAttrib[VERT_ATTRIB_COLOR1].Enabled; - vert_attrib_bit = VERT_BIT_COLOR1; + vao_state(ctx, VERT_ATTRIB_COLOR1, state); break; case GL_POINT_SIZE_ARRAY_OES: - enable_var = >VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled; - vert_attrib_bit = VERT_BIT_POINT_SIZE; FLUSH_VERTICES(ctx, _NEW_PROGRAM); ctx->VertexProgram.PointSizeEnabled =
[Mesa-dev] [PATCH] mesa: Use atomics for buffer objects reference counts.
From: Mathias FröhlichHi all, please review best Mathias The mutex is currently used for reference counting and updating the minmax index cache. The change uses atomics directly for reference counting and the mutex for the minmax cache. This is safe since the reference count is not modified beside in _mesa_reference_buffer_object where atomics aim to be used. While using the minmax cache, the calling code holds a reference to the buffer object. Thus unreferencing or even referencing the buffer object does not need to be serialized with accessing the minmax cache. The change reduces the time _mesa_reference_buffer_object_ takes by about a factor of two when looking at perf results for some of my favorite use cases. Signed-off-by: Mathias Fröhlich --- src/mesa/main/bufferobj.c | 22 ++ src/mesa/main/mtypes.h | 2 +- src/mesa/vbo/vbo_minmax_index.c | 8 3 files changed, 11 insertions(+), 21 deletions(-) diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index c1dfdfba82..67f9cd0a90 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -46,6 +46,7 @@ #include "texstore.h" #include "transformfeedback.h" #include "varray.h" +#include "util/u_atomic.h" /* Debug flags */ @@ -471,7 +472,7 @@ _mesa_delete_buffer_object(struct gl_context *ctx, bufObj->RefCount = -1000; bufObj->Name = ~0; - simple_mtx_destroy(>Mutex); + simple_mtx_destroy(>MinMaxCacheMutex); free(bufObj->Label); free(bufObj); } @@ -490,16 +491,9 @@ _mesa_reference_buffer_object_(struct gl_context *ctx, { if (*ptr) { /* Unreference the old buffer */ - GLboolean deleteFlag = GL_FALSE; struct gl_buffer_object *oldObj = *ptr; - simple_mtx_lock(>Mutex); - assert(oldObj->RefCount > 0); - oldObj->RefCount--; - deleteFlag = (oldObj->RefCount == 0); - simple_mtx_unlock(>Mutex); - - if (deleteFlag) { + if (p_atomic_dec_zero(>RefCount)) { assert(ctx->Driver.DeleteBuffer); ctx->Driver.DeleteBuffer(ctx, oldObj); } @@ -510,12 +504,8 @@ _mesa_reference_buffer_object_(struct gl_context *ctx, if (bufObj) { /* reference new buffer */ - simple_mtx_lock(>Mutex); - assert(bufObj->RefCount > 0); - - bufObj->RefCount++; + p_atomic_inc(>RefCount); *ptr = bufObj; - simple_mtx_unlock(>Mutex); } } @@ -547,11 +537,11 @@ _mesa_initialize_buffer_object(struct gl_context *ctx, GLuint name) { memset(obj, 0, sizeof(struct gl_buffer_object)); - simple_mtx_init(>Mutex, mtx_plain); obj->RefCount = 1; obj->Name = name; obj->Usage = GL_STATIC_DRAW_ARB; + simple_mtx_init(>MinMaxCacheMutex, mtx_plain); if (get_no_minmax_cache()) obj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE; } @@ -870,7 +860,7 @@ _mesa_init_buffer_objects( struct gl_context *ctx ) GLuint i; memset(, 0, sizeof(DummyBufferObject)); - simple_mtx_init(, mtx_plain); + simple_mtx_init(, mtx_plain); DummyBufferObject.RefCount = 1000*1000*1000; /* never delete */ _mesa_reference_buffer_object(ctx, >Array.ArrayBufferObj, diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 3a67d43420..b6d606386e 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1443,7 +1443,6 @@ typedef enum */ struct gl_buffer_object { - simple_mtx_t Mutex; GLint RefCount; GLuint Name; GLchar *Label; /**< GL_KHR_debug */ @@ -1464,6 +1463,7 @@ struct gl_buffer_object struct gl_buffer_mapping Mappings[MAP_COUNT]; /** Memoization of min/max index computations for static index buffers */ + simple_mtx_t MinMaxCacheMutex; struct hash_table *MinMaxCache; unsigned MinMaxCacheHitIndices; unsigned MinMaxCacheMissIndices; diff --git a/src/mesa/vbo/vbo_minmax_index.c b/src/mesa/vbo/vbo_minmax_index.c index c9d2020167..d1298dcdc3 100644 --- a/src/mesa/vbo/vbo_minmax_index.c +++ b/src/mesa/vbo/vbo_minmax_index.c @@ -115,7 +115,7 @@ vbo_get_minmax_cached(struct gl_buffer_object *bufferObj, if (!vbo_use_minmax_cache(bufferObj)) return GL_FALSE; - simple_mtx_lock(>Mutex); + simple_mtx_lock(>MinMaxCacheMutex); if (bufferObj->MinMaxCacheDirty) { /* Disable the cache permanently for this BO if the number of hits @@ -166,7 +166,7 @@ out_invalidate: } out_disable: - simple_mtx_unlock(>Mutex); + simple_mtx_unlock(>MinMaxCacheMutex); return found; } @@ -184,7 +184,7 @@ vbo_minmax_cache_store(struct gl_context *ctx, if (!vbo_use_minmax_cache(bufferObj)) return; - simple_mtx_lock(>Mutex); + simple_mtx_lock(>MinMaxCacheMutex); if (!bufferObj->MinMaxCache) { bufferObj->MinMaxCache = @@ -223,7 +223,7 @@ vbo_minmax_cache_store(struct gl_context *ctx, free(entry); out: - simple_mtx_unlock(>Mutex); +
[Mesa-dev] [PATCH] vbo: Move vbo_rebase into its only caller module tnl.
From: Mathias FröhlichHi all, The change move vbo_rebase_prims into the tnl module. The tnl module is the only user of this function. please review best Mathias Signed-off-by: Mathias Fröhlich --- src/mesa/Makefile.sources | 3 ++- src/mesa/meson.build | 2 +- src/mesa/tnl/t_draw.c | 7 ++--- src/mesa/{vbo/vbo_rebase.c => tnl/t_rebase.c} | 18 ++--- src/mesa/tnl/t_rebase.h | 39 +++ src/mesa/vbo/vbo.h| 11 6 files changed, 55 insertions(+), 25 deletions(-) rename src/mesa/{vbo/vbo_rebase.c => tnl/t_rebase.c} (94%) create mode 100644 src/mesa/tnl/t_rebase.h diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 880f379eb1..0a9aad52d0 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -370,6 +370,8 @@ TNL_FILES = \ tnl/tnl.h \ tnl/t_pipeline.c \ tnl/t_pipeline.h \ + tnl/t_rebase.c \ + tnl/t_rebase.h \ tnl/t_vb_cliptmp.h \ tnl/t_vb_fog.c \ tnl/t_vb_light.c \ @@ -405,7 +407,6 @@ VBO_FILES = \ vbo/vbo_noop.h \ vbo/vbo_primitive_restart.c \ vbo/vbo_private.h \ - vbo/vbo_rebase.c \ vbo/vbo_save_api.c \ vbo/vbo_save.c \ vbo/vbo_save_draw.c \ diff --git a/src/mesa/meson.build b/src/mesa/meson.build index a74c39d29e..aa27d59264 100644 --- a/src/mesa/meson.build +++ b/src/mesa/meson.build @@ -338,7 +338,6 @@ files_libmesa_common = files( 'vbo/vbo_noop.c', 'vbo/vbo_noop.h', 'vbo/vbo_primitive_restart.c', - 'vbo/vbo_rebase.c', 'vbo/vbo_save_api.c', 'vbo/vbo_save.c', 'vbo/vbo_save_draw.c', @@ -366,6 +365,7 @@ files_libmesa_classic = files( 'tnl/tnl.h', 'tnl/t_pipeline.c', 'tnl/t_pipeline.h', + 'tnl/t_rebase.c', 'tnl/t_vb_cliptmp.h', 'tnl/t_vb_fog.c', 'tnl/t_vb_light.c', diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c index 9fca4da1f4..c19d77d641 100644 --- a/src/mesa/tnl/t_draw.c +++ b/src/mesa/tnl/t_draw.c @@ -38,6 +38,7 @@ #include "util/half_float.h" #include "t_context.h" +#include "t_rebase.h" #include "tnl.h" @@ -461,9 +462,9 @@ void _tnl_draw_prims(struct gl_context *ctx, if (min_index) { /* We always translate away calls with min_index != 0. */ - vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, - min_index, max_index, - _tnl_draw_prims ); + t_rebase_prims( ctx, arrays, prim, nr_prims, ib, + min_index, max_index, + _tnl_draw_prims ); return; } else if ((GLint)max_index + max_basevertex > max) { diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/tnl/t_rebase.c similarity index 94% rename from src/mesa/vbo/vbo_rebase.c rename to src/mesa/tnl/t_rebase.c index 02dbc68dcb..b781781cb0 100644 --- a/src/mesa/vbo/vbo_rebase.c +++ b/src/mesa/tnl/t_rebase.c @@ -51,7 +51,7 @@ #include "main/imports.h" #include "main/mtypes.h" -#include "vbo.h" +#include "t_rebase.h" #define REBASE(TYPE) \ @@ -100,14 +100,14 @@ REBASE(GLubyte) *- can't save time by trying to upload half a vbo - typically it is * all or nothing. */ -void vbo_rebase_prims( struct gl_context *ctx, - const struct gl_vertex_array *arrays[], - const struct _mesa_prim *prim, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLuint min_index, - GLuint max_index, - vbo_draw_func draw ) +void t_rebase_prims( struct gl_context *ctx, + const struct gl_vertex_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index, + vbo_draw_func draw ) { struct gl_vertex_array tmp_arrays[VERT_ATTRIB_MAX]; const struct gl_vertex_array *tmp_array_pointers[VERT_ATTRIB_MAX]; diff --git a/src/mesa/tnl/t_rebase.h b/src/mesa/tnl/t_rebase.h new file mode 100644 index 00..35175868d5 --- /dev/null +++ b/src/mesa/tnl/t_rebase.h @@ -0,0 +1,39 @@ +/* + * mesa 3-D graphics library + * + * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to
Re: [Mesa-dev] [PATCH] r600/atomic: fix ATOMCAS instruction.
Am 05.02.2018 um 07:47 schrieb Dave Airlie: > From: Dave Airlie> > This has 3 srcs. Depends on how you count :-). Reviewed-by: Roland Scheidegger > > This fixes: > KHR-GL45.shader_atomic_counter_ops_tests.ShaderAtomicCounterOpsExchangeTestCase > > Signed-off-by: Dave Airlie > --- > src/gallium/drivers/r600/r600_shader.c | 32 +++- > 1 file changed, 31 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index 33eb5accea..4c0d554d1a 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -8698,6 +8698,33 @@ static int tgsi_atomic_op_gds(struct r600_shader_ctx > *ctx) > if (r) > return r; > > + if (gds_op == FETCH_OP_GDS_CMP_XCHG_RET) { > + if (inst->Src[3].Register.File == TGSI_FILE_IMMEDIATE) { > + int value = (ctx->literals[4 * > inst->Src[3].Register.Index + inst->Src[3].Register.SwizzleX]); > + memset(, 0, sizeof(struct r600_bytecode_alu)); > + alu.op = ALU_OP1_MOV; > + alu.dst.sel = ctx->temp_reg; > + alu.dst.chan = is_cm ? 2 : 1; > + alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; > + alu.src[0].value = value; > + alu.last = 1; > + alu.dst.write = 1; > + r = r600_bytecode_add_alu(ctx->bc, ); > + if (r) > + return r; > + } else { > + memset(, 0, sizeof(struct r600_bytecode_alu)); > + alu.op = ALU_OP1_MOV; > + alu.dst.sel = ctx->temp_reg; > + alu.dst.chan = is_cm ? 2 : 1; > + r600_bytecode_src([0], >src[3], 0); > + alu.last = 1; > + alu.dst.write = 1; > + r = r600_bytecode_add_alu(ctx->bc, ); > + if (r) > + return r; > + } > + } > if (inst->Src[2].Register.File == TGSI_FILE_IMMEDIATE) { > int value = (ctx->literals[4 * inst->Src[2].Register.Index + > inst->Src[2].Register.SwizzleX]); > int abs_value = abs(value); > @@ -8737,7 +8764,10 @@ static int tgsi_atomic_op_gds(struct r600_shader_ctx > *ctx) > gds.src_gpr2 = 0; > gds.src_sel_x = is_cm ? 0 : 4; > gds.src_sel_y = is_cm ? 1 : 0; > - gds.src_sel_z = 7; > + if (gds_op == FETCH_OP_GDS_CMP_XCHG_RET) > + gds.src_sel_z = is_cm ? 2 : 1; > + else > + gds.src_sel_z = 7; > gds.dst_sel_x = 0; > gds.dst_sel_y = 7; > gds.dst_sel_z = 7; > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/9] r600: overhaul buffer resource query.
Am 06.02.2018 um 06:04 schrieb Dave Airlie: > On 6 February 2018 at 14:12, Roland Scheideggerwrote: >> Am 05.02.2018 um 05:29 schrieb Dave Airlie: >>> From: Dave Airlie >>> >>> This cleans up and fixes the previous fix even more. >>> >>> Buffers from textures start at max const, >>> buffers from buffers/images come in from the 168 offset. >>> >>> This fixes a bunch of: >>> KHR-GL45.shader_storage_buffer_object* >>> >>> Signed-off-by: Dave Airlie >>> --- >>> src/gallium/drivers/r600/r600_shader.c | 15 --- >>> 1 file changed, 8 insertions(+), 7 deletions(-) >>> >>> diff --git a/src/gallium/drivers/r600/r600_shader.c >>> b/src/gallium/drivers/r600/r600_shader.c >>> index 8c4460a5d5..32f24c071d 100644 >>> --- a/src/gallium/drivers/r600/r600_shader.c >>> +++ b/src/gallium/drivers/r600/r600_shader.c >>> @@ -7007,7 +7007,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx >>> *ctx, boolean src_requires_l >>> return 0; >>> } >>> >>> -static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, >>> int offset) >>> +static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, >>> int offset, int eg_buffer_base) >> >> I think it would be nicer if you'd just stick to the offset parameter >> here. Just add both together in the caller - as far as this function is >> concerned two offsets don't really make sense. > > > Well it matters for the r600 path if I'm not mistaken. It just wants > id + offset, not the MAX_CONST or > other bits. > Ah yes, right. Maybe things would be more obvious if r600_do_buffer_txq would just figure out offsets on its own? It already knows what the src reg is, so determining offset for sampler/buffer/image could be easily be done there. But as long as it works... Roland ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] r600/sb/cayman: fix indirect ubo access on cayman
Am 05.02.2018 um 07:05 schrieb Dave Airlie: > From: Dave Airlie> > With sb enabled on cayman, this was overwriting the proper > cf index value with random ones if the dst gpr was 2 or 3, > only save the value for a MOVA instruction. > > Fixes: > KHR-GL45.gpu_shader5.uniform_blocks_array_indexing > (on cayman with sb) > > Signed-off-by: Dave Airlie Reviewed-by: Roland Scheidegger > --- > src/gallium/drivers/r600/sb/sb_bc_parser.cpp | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp > b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp > index 970e4141d5..87035ee2a6 100644 > --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp > +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp > @@ -567,7 +567,7 @@ int bc_parser::prepare_alu_group(cf_node* cf, > alu_group_node *g) { > n->src.push_back(get_cf_index_value(1)); > } > > - if ((n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr > == CM_V_SQ_MOVA_DST_CF_IDX1) && > + if ((flags & AF_MOVA) && (n->bc.dst_gpr == > CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) && > ctx.is_cayman()) > // Move CF_IDX value into tex instruction operands, > scheduler will later re-emit setting of CF_IDX > save_set_cf_index(n->src[0], n->bc.dst_gpr == > CM_V_SQ_MOVA_DST_CF_IDX1); > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] mesa: add xbgr support adjacent to xrgb
On Mon, Feb 5, 2018 at 6:42 AM, Ilia Mirkinwrote: > On Mon, Feb 5, 2018 at 6:24 AM, Daniel Stone wrote: >> Hi Ilia, >> >> On 4 February 2018 at 19:09, Ilia Mirkin wrote: >>> One might have split this up into multiple patches, but it's just very >>> repetitive and similar code. >> >> You probably want to add this into gbm_to_dri_image_formats[] inside >> src/gbm/backends/dri/gbm_dri.c, so Wayland compositors running on KMS >> and xf86-video-modesetting can also use these modes. >> >> Grepping for XRGB210 inside src/egl/drivers/dri2/platform_wayland.c >> would also show the fairly obvious points to add it there. > > I thought I covered all the places where this was done, but obviously > not. Perhaps in my euphoria of getting glxgears displaying the correct > colors I decided I was done. > > I'll do another passthrough, and definitely hit that file explicitly. So I've been looking at platform_wayland and platform_x11, and it's not totally clear to me how to make it all work. I'm stuck with e.g. dri2_wl_create_window_surface dri3_create_image_khr_pixmap and others, wrt how to determine whether I should pick a XBGR or XRGB format. I suspect that more information has to be stored somewhere. Perhaps the _EGLConfig? Even that wouldn't cover everything though. This is all quite foreign to me, and I'm not sure how it's supposed to all fit together. Perhaps it can be left until later? -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] r600: fixup sparse color exports.
Am 05.02.2018 um 05:58 schrieb Dave Airlie: > From: Dave Airlie> > If we have gaps in the shader mask we have to have 0x1 in them > according to a comment in radeonsi, and this is required to fix > the test at least on cayman. > > We also need to record the highest one written to write to the > ps exports reg. > > This fixes: > KHR-GL45.enhanced_layouts.fragment_data_location_api Does that mean there's actually a performance benefit when there's no gaps? That's something the APIs wouldn't tell you :-). Reviewed-by: Roland Scheidegger > > Signed-off-by: Dave Airlie > --- > src/gallium/drivers/r600/evergreen_state.c | 2 +- > src/gallium/drivers/r600/r600_shader.c | 10 ++ > src/gallium/drivers/r600/r600_shader.h | 1 + > 3 files changed, 12 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/r600/evergreen_state.c > b/src/gallium/drivers/r600/evergreen_state.c > index 4c9163c2a7..742ca5babb 100644 > --- a/src/gallium/drivers/r600/evergreen_state.c > +++ b/src/gallium/drivers/r600/evergreen_state.c > @@ -3369,7 +3369,7 @@ void evergreen_update_ps_state(struct pipe_context > *ctx, struct r600_pipe_shader > exports_ps |= 1; > } > > - num_cout = rshader->nr_ps_color_exports; > + num_cout = rshader->ps_export_highest + 1; > > exports_ps |= S_02884C_EXPORT_COLORS(num_cout); > if (!exports_ps) { > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index 72e3063804..33eb5accea 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -3876,6 +3876,16 @@ static int r600_shader_from_tgsi(struct r600_context > *rctx, > output[j].type = > V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; > shader->nr_ps_color_exports++; > shader->ps_color_export_mask |= (0xf << > (shader->output[i].sid * 4)); > + > + /* If the i-th target format is set, > all previous target formats must > + * be non-zero to avoid hangs. - from > radeonsi, seems to apply to eg as well. > + */ > + if (shader->output[i].sid > 0) > + for (unsigned x = 0; x < > shader->output[i].sid; x++) > + > shader->ps_color_export_mask |= (1 << (x*4)); > + > + if (shader->output[i].sid > > shader->ps_export_highest) > + shader->ps_export_highest = > shader->output[i].sid; > if (shader->fs_write_all && > (rscreen->b.chip_class >= EVERGREEN)) { > for (k = 1; k < > max_color_exports; k++) { > j++; > diff --git a/src/gallium/drivers/r600/r600_shader.h > b/src/gallium/drivers/r600/r600_shader.h > index 7fca3f455e..4b23facf6f 100644 > --- a/src/gallium/drivers/r600/r600_shader.h > +++ b/src/gallium/drivers/r600/r600_shader.h > @@ -85,6 +85,7 @@ struct r600_shader { > /* Real number of ps color exports compiled in the bytecode */ > unsignednr_ps_color_exports; > unsignedps_color_export_mask; > + unsignedps_export_highest; > /* bit n is set if the shader writes gl_ClipDistance[n] */ > unsignedcc_dist_mask; > unsignedclip_dist_write; > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/9] r600: overhaul buffer resource query.
On 6 February 2018 at 14:12, Roland Scheideggerwrote: > Am 05.02.2018 um 05:29 schrieb Dave Airlie: >> From: Dave Airlie >> >> This cleans up and fixes the previous fix even more. >> >> Buffers from textures start at max const, >> buffers from buffers/images come in from the 168 offset. >> >> This fixes a bunch of: >> KHR-GL45.shader_storage_buffer_object* >> >> Signed-off-by: Dave Airlie >> --- >> src/gallium/drivers/r600/r600_shader.c | 15 --- >> 1 file changed, 8 insertions(+), 7 deletions(-) >> >> diff --git a/src/gallium/drivers/r600/r600_shader.c >> b/src/gallium/drivers/r600/r600_shader.c >> index 8c4460a5d5..32f24c071d 100644 >> --- a/src/gallium/drivers/r600/r600_shader.c >> +++ b/src/gallium/drivers/r600/r600_shader.c >> @@ -7007,7 +7007,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx >> *ctx, boolean src_requires_l >> return 0; >> } >> >> -static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int >> offset) >> +static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int >> offset, int eg_buffer_base) > > I think it would be nicer if you'd just stick to the offset parameter > here. Just add both together in the caller - as far as this function is > concerned two offsets don't really make sense. Well it matters for the r600 path if I'm not mistaken. It just wants id + offset, not the MAX_CONST or other bits. Dave. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/4] ac: add 64bit support to ac_find_lsb()
--- src/amd/common/ac_llvm_build.c | 22 -- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 08c488775e..0764d8c7f9 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1984,6 +1984,20 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMTypeRef dst_type, LLVMValueRef src0) { + unsigned src0_bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0)); + const char *intrin_name; + LLVMTypeRef type; + LLVMValueRef zero; + if (src0_bitsize == 64) { + intrin_name = "llvm.cttz.i64"; + type = ctx->i64; + zero = ctx->i64_0; + } else { + intrin_name = "llvm.cttz.i32"; + type = ctx->i32; + zero = ctx->i32_0; + } + LLVMValueRef params[2] = { src0, @@ -1999,15 +2013,19 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMConstInt(ctx->i1, 1, false), }; - LLVMValueRef lsb = ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32, + LLVMValueRef lsb = ac_build_intrinsic(ctx, intrin_name, type, params, 2, AC_FUNC_ATTR_READNONE); + if (src0_bitsize == 64) { + lsb = ac_unpack_64_2x32_split_x(ctx, lsb); + } + /* TODO: We need an intrinsic to skip this conditional. */ /* Check for zero: */ return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0, - ctx->i32_0, ""), + zero, ""), LLVMConstInt(ctx->i32, -1, 0), lsb, ""); } -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/4] ac: create ac_unpack_64_2x32_split_x() helper
This will be used in the following commits. --- src/amd/common/ac_llvm_build.c | 8 src/amd/common/ac_llvm_build.h | 3 +++ src/amd/common/ac_nir_to_llvm.c | 6 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index a86ba962fa..6375b106f7 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1992,6 +1992,14 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMConstInt(ctx->i32, -1, 0), lsb, ""); } +LLVMValueRef ac_unpack_64_2x32_split_x(struct ac_llvm_context *ctx, + LLVMValueRef src0) +{ + LLVMValueRef tmp = LLVMBuildBitCast(ctx->builder, src0, + ctx->v2i32, ""); + return LLVMBuildExtractElement(ctx->builder, tmp, ctx->i32_0, ""); +} + LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type) { return LLVMPointerType(LLVMArrayType(elem_type, 0), diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 47c843fb4b..78991b3e99 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -348,6 +348,9 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMTypeRef dst_type, LLVMValueRef src0); +LLVMValueRef ac_unpack_64_2x32_split_x(struct ac_llvm_context *ctx, + LLVMValueRef src0); + LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type); #ifdef __cplusplus diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 9a9db2dce9..ac4af12b3e 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2042,11 +2042,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) case nir_op_unpack_64_2x32_split_x: { assert(instr->src[0].src.ssa->num_components == 1); - LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], - ctx->ac.v2i32, - ""); - result = LLVMBuildExtractElement(ctx->ac.builder, tmp, -ctx->ac.i32_0, ""); + result = ac_unpack_64_2x32_split_x(>ac, src[0]); break; } -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/4] ac: move get_elem_bits() to ac_llvm_build.c
--- src/amd/common/ac_llvm_build.c | 19 +++ src/amd/common/ac_llvm_build.h | 3 +++ src/amd/common/ac_nir_to_llvm.c | 34 -- 3 files changed, 30 insertions(+), 26 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 6375b106f7..08c488775e 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -128,6 +128,25 @@ ac_llvm_extract_elem(struct ac_llvm_context *ac, LLVMConstInt(ac->i32, index, false), ""); } +int +ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type) +{ + if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) + type = LLVMGetElementType(type); + + if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind) + return LLVMGetIntTypeWidth(type); + + if (type == ctx->f16) + return 16; + if (type == ctx->f32) + return 32; + if (type == ctx->f64) + return 64; + + unreachable("Unhandled type kind in get_elem_bits"); +} + unsigned ac_get_type_size(LLVMTypeRef type) { diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 78991b3e99..fa09bd10a5 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -92,6 +92,9 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context, int ac_get_llvm_num_components(LLVMValueRef value); +int +ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type); + LLVMValueRef ac_llvm_extract_elem(struct ac_llvm_context *ac, LLVMValueRef value, diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index e06a22f8a9..e284795fdc 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -329,24 +329,6 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, return main_function; } -static int get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type) -{ - if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) - type = LLVMGetElementType(type); - - if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind) - return LLVMGetIntTypeWidth(type); - - if (type == ctx->f16) - return 16; - if (type == ctx->f32) - return 32; - if (type == ctx->f64) - return 64; - - unreachable("Unhandled type kind in get_elem_bits"); -} - static LLVMValueRef unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, unsigned rshift, unsigned bitwidth) @@ -1267,7 +1249,7 @@ static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx, }; MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, -get_elem_bits(ctx, result_type)); +ac_get_elem_bits(ctx, result_type)); assert(length < sizeof(name)); return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE); } @@ -1284,7 +1266,7 @@ static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx, }; MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, -get_elem_bits(ctx, result_type)); +ac_get_elem_bits(ctx, result_type)); assert(length < sizeof(name)); return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE); } @@ -1302,7 +1284,7 @@ static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx, }; MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, -get_elem_bits(ctx, result_type)); +ac_get_elem_bits(ctx, result_type)); assert(length < sizeof(name)); return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE); } @@ -1922,7 +1904,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = ac_build_intrinsic(>ac, "llvm.bitreverse.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); break; case nir_op_bit_count: - if (get_elem_bits(>ac, LLVMTypeOf(src[0])) == 32) + if (ac_get_elem_bits(>ac, LLVMTypeOf(src[0])) == 32) result = ac_build_intrinsic(>ac, "llvm.ctpop.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); else { result = ac_build_intrinsic(>ac, "llvm.ctpop.i64", ctx->ac.i64, src, 1, AC_FUNC_ATTR_READNONE); @@ -1966,7 +1948,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) case nir_op_u2u32:
[Mesa-dev] [PATCH 2/4] ac: add 64bit bitCount support
--- src/amd/common/ac_nir_to_llvm.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index ac4af12b3e..e06a22f8a9 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1922,7 +1922,12 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = ac_build_intrinsic(>ac, "llvm.bitreverse.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); break; case nir_op_bit_count: - result = ac_build_intrinsic(>ac, "llvm.ctpop.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); + if (get_elem_bits(>ac, LLVMTypeOf(src[0])) == 32) + result = ac_build_intrinsic(>ac, "llvm.ctpop.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); + else { + result = ac_build_intrinsic(>ac, "llvm.ctpop.i64", ctx->ac.i64, src, 1, AC_FUNC_ATTR_READNONE); + result = ac_unpack_64_2x32_split_x(>ac, result); + } break; case nir_op_vec2: case nir_op_vec3: -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 9/9] r600: work out target mask at framebuffer bind.
For 7-9/9 Reviewed-by: Roland ScheideggerAm 05.02.2018 um 05:29 schrieb Dave Airlie: > From: Dave Airlie > > If we only get 1,2,3,6 framebuffers we want a sparse target mask. > > Signed-off-by: Dave Airlie > --- > src/gallium/drivers/r600/evergreen_state.c | 10 +++--- > src/gallium/drivers/r600/r600_pipe.h | 1 + > src/gallium/drivers/r600/r600_state.c | 2 +- > 3 files changed, 9 insertions(+), 4 deletions(-) > > diff --git a/src/gallium/drivers/r600/evergreen_state.c > b/src/gallium/drivers/r600/evergreen_state.c > index f8042c21c0..4c9163c2a7 100644 > --- a/src/gallium/drivers/r600/evergreen_state.c > +++ b/src/gallium/drivers/r600/evergreen_state.c > @@ -1436,7 +1436,7 @@ static void evergreen_set_framebuffer_state(struct > pipe_context *ctx, > struct r600_surface *surf; > struct r600_texture *rtex; > uint32_t i, log_samples; > - > + uint32_t target_mask = 0; > /* Flush TC when changing the framebuffer state, because the only >* client not using TC that can change textures is the framebuffer. >* Other places don't typically have to flush TC. > @@ -1463,6 +1463,8 @@ static void evergreen_set_framebuffer_state(struct > pipe_context *ctx, > if (!surf) > continue; > > + target_mask |= (0xf << (i * 4)); > + > rtex = (struct r600_texture*)surf->base.texture; > > r600_context_add_resource_size(ctx, state->cbufs[i]->texture); > @@ -1528,7 +1530,9 @@ static void evergreen_set_framebuffer_state(struct > pipe_context *ctx, > r600_mark_atom_dirty(rctx, >db_misc_state.atom); > } > > - if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs) { > + if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs || > + rctx->cb_misc_state.bound_cbufs_target_mask != target_mask) { > + rctx->cb_misc_state.bound_cbufs_target_mask = target_mask; > rctx->cb_misc_state.nr_cbufs = state->nr_cbufs; > r600_mark_atom_dirty(rctx, >cb_misc_state.atom); > } > @@ -2025,7 +2029,7 @@ static void evergreen_emit_cb_misc_state(struct > r600_context *rctx, struct r600_ > { > struct radeon_winsys_cs *cs = rctx->b.gfx.cs; > struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom; > - unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1; > + unsigned fb_colormask = a->bound_cbufs_target_mask; > unsigned ps_colormask = a->ps_color_export_mask; > unsigned rat_colormask = evergreen_construct_rat_mask(rctx, a, > a->nr_cbufs); > radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2); > diff --git a/src/gallium/drivers/r600/r600_pipe.h > b/src/gallium/drivers/r600/r600_pipe.h > index 9b94f3654c..9caf3b8512 100644 > --- a/src/gallium/drivers/r600/r600_pipe.h > +++ b/src/gallium/drivers/r600/r600_pipe.h > @@ -152,6 +152,7 @@ struct r600_cb_misc_state { > unsigned cb_color_control; /* this comes from blend state */ > unsigned blend_colormask; /* 8*4 bits for 8 RGBA colorbuffers */ > unsigned nr_cbufs; > + unsigned bound_cbufs_target_mask; > unsigned nr_ps_color_outputs; > unsigned ps_color_export_mask; > unsigned image_rat_enabled_mask; > diff --git a/src/gallium/drivers/r600/r600_state.c > b/src/gallium/drivers/r600/r600_state.c > index 6ff8037d9c..5cf99c18b6 100644 > --- a/src/gallium/drivers/r600/r600_state.c > +++ b/src/gallium/drivers/r600/r600_state.c > @@ -1525,7 +1525,7 @@ static void r600_emit_cb_misc_state(struct r600_context > *rctx, struct r600_atom > } > radeon_set_context_reg(cs, R_028808_CB_COLOR_CONTROL, > a->cb_color_control); > } else { > - unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - > 1; > + unsigned fb_colormask = a->bound_cbufs_target_mask; > unsigned ps_colormask = a->ps_color_export_mask; > unsigned multiwrite = a->multiwrite && a->nr_cbufs > 1; > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 102032] nir_op_imod is incorrectly implemented as LLVM's srem
https://bugs.freedesktop.org/show_bug.cgi?id=102032 --- Comment #2 from programmerj...@gmail.com --- (In reply to Bas Nieuwenhuizen from comment #1) > I went looking to why there were no good CTS tests for this and found this > in the vulkan spec: > > For the OpSRem and OpSMod instructions, if either operand is negative the > result is undefined. I think this bug should be fixed to support OpenCL. I have not found any references in the OpenCL specs to results of the remainder operator, so I'm guessing it uses the definition eventually derived from C99 section 6.5.5.6 which defines the results for negative operands. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 8/9] r600: work out shader export mask at shader build time
Am 05.02.2018 um 05:29 schrieb Dave Airlie: > From: Dave Airlie> > Since enhanced layouts allows setting specific MRT outputs, we > can get sparse outputs, so we have to calculate the shader > mask earlier. > > Signed-off-by: Dave Airlie > --- > src/gallium/drivers/r600/evergreen_state.c | 3 ++- > src/gallium/drivers/r600/r600_pipe.h | 1 + > src/gallium/drivers/r600/r600_shader.c | 3 +++ > src/gallium/drivers/r600/r600_shader.h | 3 +++ > src/gallium/drivers/r600/r600_state.c| 2 +- > src/gallium/drivers/r600/r600_state_common.c | 1 + > 6 files changed, 11 insertions(+), 2 deletions(-) > > diff --git a/src/gallium/drivers/r600/evergreen_state.c > b/src/gallium/drivers/r600/evergreen_state.c > index 11e473d604..f8042c21c0 100644 > --- a/src/gallium/drivers/r600/evergreen_state.c > +++ b/src/gallium/drivers/r600/evergreen_state.c > @@ -2026,7 +2026,7 @@ static void evergreen_emit_cb_misc_state(struct > r600_context *rctx, struct r600_ > struct radeon_winsys_cs *cs = rctx->b.gfx.cs; > struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom; > unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1; > - unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * > 4)) - 1; > + unsigned ps_colormask = a->ps_color_export_mask; > unsigned rat_colormask = evergreen_construct_rat_mask(rctx, a, > a->nr_cbufs); > radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2); > radeon_emit(cs, (a->blend_colormask & fb_colormask) | rat_colormask); > /* R_028238_CB_TARGET_MASK */ > @@ -3373,6 +3373,7 @@ void evergreen_update_ps_state(struct pipe_context > *ctx, struct r600_pipe_shader > exports_ps = 2; > } > shader->nr_ps_color_outputs = num_cout; > + shader->ps_color_export_mask = rshader->ps_color_export_mask; > if (ninterp == 0) { > ninterp = 1; > have_perspective = TRUE; > diff --git a/src/gallium/drivers/r600/r600_pipe.h > b/src/gallium/drivers/r600/r600_pipe.h > index 0b772b2599..9b94f3654c 100644 > --- a/src/gallium/drivers/r600/r600_pipe.h > +++ b/src/gallium/drivers/r600/r600_pipe.h > @@ -153,6 +153,7 @@ struct r600_cb_misc_state { > unsigned blend_colormask; /* 8*4 bits for 8 RGBA colorbuffers */ > unsigned nr_cbufs; > unsigned nr_ps_color_outputs; > + unsigned ps_color_export_mask; > unsigned image_rat_enabled_mask; > unsigned buffer_rat_enabled_mask; > bool multiwrite; > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index 893a71b915..9984e783b5 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -3875,6 +3875,7 @@ static int r600_shader_from_tgsi(struct r600_context > *rctx, > output[j].array_base = > shader->output[i].sid; > output[j].type = > V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; > shader->nr_ps_color_exports++; > + shader->ps_color_export_mask |= (0xf << > (shader->output[i].sid * 4)); > if (shader->fs_write_all && > (rscreen->b.chip_class >= EVERGREEN)) { > for (k = 1; k < > max_color_exports; k++) { > j++; > @@ -3890,6 +3891,7 @@ static int r600_shader_from_tgsi(struct r600_context > *rctx, > output[j].op = > CF_OP_EXPORT; > output[j].type = > V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; > > shader->nr_ps_color_exports++; > + > shader->ps_color_export_mask |= (0xf << (j * 4)); > } > } > } else if (shader->output[i].name == > TGSI_SEMANTIC_POSITION) { > @@ -3978,6 +3980,7 @@ static int r600_shader_from_tgsi(struct r600_context > *rctx, > output[j].op = CF_OP_EXPORT; > j++; > shader->nr_ps_color_exports++; > + shader->ps_color_export_mask = 0xf; > } > > noutput = j; > diff --git a/src/gallium/drivers/r600/r600_shader.h > b/src/gallium/drivers/r600/r600_shader.h > index da96688e54..7fca3f455e 100644 > --- a/src/gallium/drivers/r600/r600_shader.h > +++ b/src/gallium/drivers/r600/r600_shader.h > @@ -84,6 +84,7 @@ struct r600_shader { > unsignednr_ps_max_color_exports; > /* Real number of ps color exports compiled in the bytecode */ > unsignednr_ps_color_exports;
Re: [Mesa-dev] [PATCH 6/9] r600/compute: only mark buffer/image state dirty for fragment shaders
Am 05.02.2018 um 05:29 schrieb Dave Airlie: > From: Dave Airlie> > The compute emission path always emits this currently, and emitting > it on the fragment path breaks the blitter. > > This fixes gpu hangs in KHR-GL45.compute_shader.resource-texture > > Signed-off-by: Dave Airlie I have some feeling things would be more robust if an atom must not be emitted in some cases, then the atom emit code should take care of it, rather than relying on not setting it dirty. Albeit since compute does not actually really use the ordinary atom list, maybe there should be a separate atom list really? Seems like dirty handling for compute in general could need some improvement. In any case, I suppose that'll have to do for now, so for 4-6/9 Reviewed-by: Roland Scheidegger > --- > src/gallium/drivers/r600/evergreen_state.c | 6 -- > 1 file changed, 4 insertions(+), 2 deletions(-) > > diff --git a/src/gallium/drivers/r600/evergreen_state.c > b/src/gallium/drivers/r600/evergreen_state.c > index 0999cc5de8..11e473d604 100644 > --- a/src/gallium/drivers/r600/evergreen_state.c > +++ b/src/gallium/drivers/r600/evergreen_state.c > @@ -4062,7 +4062,8 @@ static void evergreen_set_shader_buffers(struct > pipe_context *ctx, > r600_mark_atom_dirty(rctx, >cb_misc_state.atom); > } > > - r600_mark_atom_dirty(rctx, >atom); > + if (shader == PIPE_SHADER_FRAGMENT) > + r600_mark_atom_dirty(rctx, >atom); > } > > static void evergreen_set_shader_images(struct pipe_context *ctx, > @@ -4238,7 +4239,8 @@ static void evergreen_set_shader_images(struct > pipe_context *ctx, > r600_mark_atom_dirty(rctx, >cb_misc_state.atom); > } > > - r600_mark_atom_dirty(rctx, >atom); > + if (shader == PIPE_SHADER_FRAGMENT) > + r600_mark_atom_dirty(rctx, >atom); > } > > static void evergreen_get_pipe_constant_buffer(struct r600_context *rctx, > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Haiku: convert to autotools
On 2018-02-05 18:13, Dylan Baker wrote: Pretty close. I lied, apparently the pthreads fix is in 0.44 https://github.com/mesonbuild/meson/commit/fc547ad05e5a8e650ae5bc2ecc7d40e4dbcc9f0f Here's my branch, but it needs rebase pretty bad, there's also a patch to use shared glapi that I added trying to see if that would get the build working that needs to be removed: https://github.com/dcbaker/mesa/tree/wip/meson-haiku Ok. I went over and got Haiku building with menson with the following change: https://github.com/kallisti5/mesa/commit/e33dfab54d99edacdf1d24c402d29f50818631b3 Any feedback welcome. I need to review your branch now and see if I can make improvements based on your changes. Thanks! -- Alex ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/9] r600: overhaul buffer resource query.
Am 05.02.2018 um 05:29 schrieb Dave Airlie: > From: Dave Airlie> > This cleans up and fixes the previous fix even more. > > Buffers from textures start at max const, > buffers from buffers/images come in from the 168 offset. > > This fixes a bunch of: > KHR-GL45.shader_storage_buffer_object* > > Signed-off-by: Dave Airlie > --- > src/gallium/drivers/r600/r600_shader.c | 15 --- > 1 file changed, 8 insertions(+), 7 deletions(-) > > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index 8c4460a5d5..32f24c071d 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -7007,7 +7007,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx > *ctx, boolean src_requires_l > return 0; > } > > -static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int > offset) > +static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int > offset, int eg_buffer_base) I think it would be nicer if you'd just stick to the offset parameter here. Just add both together in the caller - as far as this function is concerned two offsets don't really make sense. Other than that, and for 1-3/9 Reviewed-by: Roland Scheidegger > { > struct tgsi_full_instruction *inst = > >parse.FullToken.FullInstruction; > int r; > @@ -7033,7 +7033,7 @@ static int r600_do_buffer_txq(struct r600_shader_ctx > *ctx, int reg_idx, int offs > struct r600_bytecode_vtx vtx; > memset(, 0, sizeof(vtx)); > vtx.op = FETCH_OP_GET_BUFFER_RESINFO; > - vtx.buffer_id = id + R600_MAX_CONST_BUFFERS; > + vtx.buffer_id = id + eg_buffer_base; > vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; > vtx.src_gpr = 0; > vtx.mega_fetch_count = 16; /* no idea here really... */ > @@ -7107,7 +7107,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) > if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { > if (ctx->bc->chip_class < EVERGREEN) > ctx->shader->uses_tex_buffers = true; > - return r600_do_buffer_txq(ctx, 1, 0); > + return r600_do_buffer_txq(ctx, 1, 0, > R600_MAX_CONST_BUFFERS); > } > else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { > if (ctx->bc->chip_class < EVERGREEN) > @@ -8821,10 +8821,11 @@ static int tgsi_resq(struct r600_shader_ctx *ctx) > (inst->Src[0].Register.File == TGSI_FILE_IMAGE && > inst->Memory.Texture == TGSI_TEXTURE_BUFFER)) { > if (ctx->bc->chip_class < EVERGREEN) > ctx->shader->uses_tex_buffers = true; > - unsigned offset = 0; > - if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) > - offset += R600_IMAGE_REAL_RESOURCE_OFFSET - > R600_MAX_CONST_BUFFERS + ctx->shader->image_size_const_offset; > - return r600_do_buffer_txq(ctx, 0, offset); > + unsigned eg_buffer_base = 0; > + eg_buffer_base = R600_IMAGE_REAL_RESOURCE_OFFSET; > + if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) > + eg_buffer_base += ctx->info.file_count[TGSI_FILE_IMAGE]; > + return r600_do_buffer_txq(ctx, 0, > ctx->shader->image_size_const_offset, eg_buffer_base); > } > > if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY && > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nir: remove the abs call in is_neg_power_of_two
val->i32[swizzle[i]] is guaranteed to have non-positive value before the __is_power_of_two call, so unary minus is equivalent to abs in this case. --- src/compiler/nir/nir_search_helpers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index 2e3bd137d6..66e1546ae6 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -80,7 +80,7 @@ is_neg_power_of_two(nir_alu_instr *instr, unsigned src, unsigned num_components, case nir_type_int: if (val->i32[swizzle[i]] > 0) return false; - if (!__is_power_of_two(abs(val->i32[swizzle[i]]))) + if (!__is_power_of_two(-val->i32[swizzle[i]])) return false; break; default: -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 20/24] anv/cmd_buffer: Rework aux tracking
On Mon, Feb 05, 2018 at 06:05:59PM -0800, Jason Ekstrand wrote: > On Mon, Feb 5, 2018 at 5:41 PM, Nanley Cherywrote: > > > On Fri, Jan 19, 2018 at 03:47:37PM -0800, Jason Ekstrand wrote: > > > This commit completely reworks aux tracking. This includes a number of > > > somewhat distinct changes: > > > > > > 1) Since we are no longer fast-clearing multiple slices, we only need > > > to track one fast clear color and one fast clear type. > > > > > > 2) We store two bits for fast clear instead of one to let us > > > distinguish between zero and non-zero fast clear colors. This is > > > needed so that we can do full resolves when transitioning to > > > PRESENT_SRC_KHR with gen9 CCS images where we allow zero clear > > > values in all sorts of places wouldn't normally. > > > > > > 3) We now track compression state as a boolean separate from fast clear > > > type and this is tracked on a per-slice granularity. > > > > > > The previous scheme had some issues when it came to individual slices of > > > a multi-LOD images. In particular, we only tracked "needs resolve" > > > per-LOD but you could do a vkCmdPipelineBarrier that would only resolve > > > a portion of the image and would set "needs resolve" to false anyway. > > > Also, any transition from an undefined layout would reset the clear > > > color for the entire LOD regardless of whether or not there was some > > > clear color on some other slice. > > > > > > As far as full/partial resolves go, he assumptions of the previous > > > scheme held because the one case where we do need a full resolve when > > > CCS_E is enabled is for window-system images. Since we only ever > > > allowed X-tiled window-system images, CCS was entirely disabled on gen9+ > > > and we never got CCS_E. With the advent of Y-tiled window-system > > > buffers, we now need to properly support doing a full resolve of images > > > marked CCS_E. > > > --- > > > src/intel/vulkan/anv_blorp.c | 3 +- > > > src/intel/vulkan/anv_image.c | 96 ++- > > > src/intel/vulkan/anv_private.h | 53 +++--- > > > src/intel/vulkan/genX_cmd_buffer.c | 340 +++--- > > --- > > > 4 files changed, 331 insertions(+), 161 deletions(-) > > > > > > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c > > > index 3698543..594b0d8 100644 > > > --- a/src/intel/vulkan/anv_blorp.c > > > +++ b/src/intel/vulkan/anv_blorp.c > > > @@ -1757,8 +1757,7 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, > > > * particular value and don't care about format or clear value. > > > */ > > >const struct anv_address clear_color_addr = > > > - anv_image_get_clear_color_addr(cmd_buffer->device, image, > > > -aspect, level); > > > + anv_image_get_clear_color_addr(cmd_buffer->device, image, > > aspect); > > >surf.clear_color_addr = anv_to_blorp_address(clear_color_addr); > > > } > > > > > > diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c > > > index 94b9ecb..d5f8dcf 100644 > > > --- a/src/intel/vulkan/anv_image.c > > > +++ b/src/intel/vulkan/anv_image.c > > > @@ -190,46 +190,54 @@ all_formats_ccs_e_compatible(const struct > > gen_device_info *devinfo, > > > * fast-clear values in non-trivial cases (e.g., outside of a render > > pass in > > > * which a fast clear has occurred). > > > * > > > - * For the purpose of discoverability, the algorithm used to manage > > this buffer > > > - * is described here. A clear value in this buffer is updated when a > > fast clear > > > - * is performed on a subresource. One of two synchronization operations > > is > > > - * performed in order for a following memory access to use the > > fast-clear > > > - * value: > > > - *a. Copy the value from the buffer to the surface state object > > used for > > > - * reading. This is done implicitly when the value is the clear > > value > > > - * predetermined to be the default in other surface state > > objects. This > > > - * is currently only done explicitly for the operation below. > > > - *b. Do (a) and use the surface state object to resolve the > > subresource. > > > - * This is only done during layout transitions for decent > > performance. > > > + * In order to avoid having multiple clear colors for a single plane of > > an > > > + * image (hence a single RENDER_SURFACE_STATE), we only allow > > fast-clears on > > > + * the first slice (level 0, layer 0). At the time of our testing (Jan > > 17, > > > + * 2018), there were known applications which would benefit from > > fast-clearing > > > + * more than just the first slice. > > > * > > > - * With the above scheme, we can fast-clear whenever the hardware > > allows except > > > - * for two cases in which synchronization becomes impossible or > > undesirable: > > > - ** The subresource is in the GENERAL layout and is
[Mesa-dev] [PATCH v3 20/24] anv/cmd_buffer: Rework aux tracking
This commit completely reworks aux tracking. This includes a number of somewhat distinct changes: 1) Since we are no longer fast-clearing multiple slices, we only need to track one fast clear color and one fast clear type. 2) We store two bits for fast clear instead of one to let us distinguish between zero and non-zero fast clear colors. This is needed so that we can do full resolves when transitioning to PRESENT_SRC_KHR with gen9 CCS images where we allow zero clear values in all sorts of places we wouldn't normally. 3) We now track compression state as a boolean separate from fast clear type and this is tracked on a per-slice granularity. The previous scheme had some issues when it came to individual slices of a multi-LOD images. In particular, we only tracked "needs resolve" per-LOD but you could do a vkCmdPipelineBarrier that would only resolve a portion of the image and would set "needs resolve" to false anyway. Also, any transition from an undefined layout would reset the clear color for the entire LOD regardless of whether or not there was some clear color on some other slice. As far as full/partial resolves go, he assumptions of the previous scheme held because the one case where we do need a full resolve when CCS_E is enabled is for window-system images. Since we only ever allowed X-tiled window-system images, CCS was entirely disabled on gen9+ and we never got CCS_E. With the advent of Y-tiled window-system buffers, we now need to properly support doing a full resolve of images marked CCS_E. v2 (Jason Ekstrand): - Fix an bug in the compressed flag offset calculation - Treat 3D images as multi-slice for the purposes of resolve tracking Reviewed-by: Topi Pohjolainen--- src/intel/vulkan/anv_blorp.c | 3 +- src/intel/vulkan/anv_image.c | 100 ++- src/intel/vulkan/anv_private.h | 60 --- src/intel/vulkan/genX_cmd_buffer.c | 340 +++-- 4 files changed, 345 insertions(+), 158 deletions(-) diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 497ae6f..fc3b717 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -1758,8 +1758,7 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, * particular value and don't care about format or clear value. */ const struct anv_address clear_color_addr = - anv_image_get_clear_color_addr(cmd_buffer->device, image, -aspect, level); + anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect); surf.clear_color_addr = anv_to_blorp_address(clear_color_addr); } diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 11942d0..011e952 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -190,46 +190,54 @@ all_formats_ccs_e_compatible(const struct gen_device_info *devinfo, * fast-clear values in non-trivial cases (e.g., outside of a render pass in * which a fast clear has occurred). * - * For the purpose of discoverability, the algorithm used to manage this buffer - * is described here. A clear value in this buffer is updated when a fast clear - * is performed on a subresource. One of two synchronization operations is - * performed in order for a following memory access to use the fast-clear - * value: - *a. Copy the value from the buffer to the surface state object used for - * reading. This is done implicitly when the value is the clear value - * predetermined to be the default in other surface state objects. This - * is currently only done explicitly for the operation below. - *b. Do (a) and use the surface state object to resolve the subresource. - * This is only done during layout transitions for decent performance. + * In order to avoid having multiple clear colors for a single plane of an + * image (hence a single RENDER_SURFACE_STATE), we only allow fast-clears on + * the first slice (level 0, layer 0). At the time of our testing (Jan 17, + * 2018), there were no known applications which would benefit from fast- + * clearing more than just the first slice. * - * With the above scheme, we can fast-clear whenever the hardware allows except - * for two cases in which synchronization becomes impossible or undesirable: - ** The subresource is in the GENERAL layout and is cleared to a value - * other than the special default value. + * The fast clear portion of the image is laid out in the following order: * - * Performing a synchronization operation in order to read from the - * subresource is undesirable in this case. Firstly, b) is not an option - * because a layout transition isn't required between a write and read of - * an image in the GENERAL layout. Secondly, it's undesirable to do a) - * explicitly because it would require large infrastructural changes. The -
Re: [Mesa-dev] [PATCH v2 20/24] anv/cmd_buffer: Rework aux tracking
On Fri, Jan 19, 2018 at 03:47:37PM -0800, Jason Ekstrand wrote: > This commit completely reworks aux tracking. This includes a number of > somewhat distinct changes: > > 1) Since we are no longer fast-clearing multiple slices, we only need > to track one fast clear color and one fast clear type. > > 2) We store two bits for fast clear instead of one to let us > distinguish between zero and non-zero fast clear colors. This is > needed so that we can do full resolves when transitioning to > PRESENT_SRC_KHR with gen9 CCS images where we allow zero clear > values in all sorts of places wouldn't normally. > > 3) We now track compression state as a boolean separate from fast clear > type and this is tracked on a per-slice granularity. > > The previous scheme had some issues when it came to individual slices of > a multi-LOD images. In particular, we only tracked "needs resolve" > per-LOD but you could do a vkCmdPipelineBarrier that would only resolve > a portion of the image and would set "needs resolve" to false anyway. > Also, any transition from an undefined layout would reset the clear > color for the entire LOD regardless of whether or not there was some > clear color on some other slice. > > As far as full/partial resolves go, he assumptions of the previous > scheme held because the one case where we do need a full resolve when > CCS_E is enabled is for window-system images. Since we only ever > allowed X-tiled window-system images, CCS was entirely disabled on gen9+ > and we never got CCS_E. With the advent of Y-tiled window-system > buffers, we now need to properly support doing a full resolve of images > marked CCS_E. > --- > src/intel/vulkan/anv_blorp.c | 3 +- > src/intel/vulkan/anv_image.c | 96 ++- > src/intel/vulkan/anv_private.h | 53 +++--- > src/intel/vulkan/genX_cmd_buffer.c | 340 > +++-- > 4 files changed, 331 insertions(+), 161 deletions(-) > Could you send out another rev with the 3D surface changes squashed in? I was almost about to send review feedback on a bug you've already fixed with the add-on patch. Thanks, Nanley > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c > index 3698543..594b0d8 100644 > --- a/src/intel/vulkan/anv_blorp.c > +++ b/src/intel/vulkan/anv_blorp.c > @@ -1757,8 +1757,7 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, > * particular value and don't care about format or clear value. > */ >const struct anv_address clear_color_addr = > - anv_image_get_clear_color_addr(cmd_buffer->device, image, > -aspect, level); > + anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect); >surf.clear_color_addr = anv_to_blorp_address(clear_color_addr); > } > > diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c > index 94b9ecb..d5f8dcf 100644 > --- a/src/intel/vulkan/anv_image.c > +++ b/src/intel/vulkan/anv_image.c > @@ -190,46 +190,54 @@ all_formats_ccs_e_compatible(const struct > gen_device_info *devinfo, > * fast-clear values in non-trivial cases (e.g., outside of a render pass in > * which a fast clear has occurred). > * > - * For the purpose of discoverability, the algorithm used to manage this > buffer > - * is described here. A clear value in this buffer is updated when a fast > clear > - * is performed on a subresource. One of two synchronization operations is > - * performed in order for a following memory access to use the fast-clear > - * value: > - *a. Copy the value from the buffer to the surface state object used for > - * reading. This is done implicitly when the value is the clear value > - * predetermined to be the default in other surface state objects. This > - * is currently only done explicitly for the operation below. > - *b. Do (a) and use the surface state object to resolve the subresource. > - * This is only done during layout transitions for decent performance. > + * In order to avoid having multiple clear colors for a single plane of an > + * image (hence a single RENDER_SURFACE_STATE), we only allow fast-clears on > + * the first slice (level 0, layer 0). At the time of our testing (Jan 17, > + * 2018), there were known applications which would benefit from > fast-clearing > + * more than just the first slice. > * > - * With the above scheme, we can fast-clear whenever the hardware allows > except > - * for two cases in which synchronization becomes impossible or undesirable: > - ** The subresource is in the GENERAL layout and is cleared to a value > - * other than the special default value. > + * The fast clear portion of the image is laid out in the following order: > * > - * Performing a synchronization operation in order to read from the > - * subresource is undesirable in this case. Firstly, b) is not an option > - * because a
Re: [Mesa-dev] [PATCH v2 20/24] anv/cmd_buffer: Rework aux tracking
On Mon, Feb 5, 2018 at 5:41 PM, Nanley Cherywrote: > On Fri, Jan 19, 2018 at 03:47:37PM -0800, Jason Ekstrand wrote: > > This commit completely reworks aux tracking. This includes a number of > > somewhat distinct changes: > > > > 1) Since we are no longer fast-clearing multiple slices, we only need > > to track one fast clear color and one fast clear type. > > > > 2) We store two bits for fast clear instead of one to let us > > distinguish between zero and non-zero fast clear colors. This is > > needed so that we can do full resolves when transitioning to > > PRESENT_SRC_KHR with gen9 CCS images where we allow zero clear > > values in all sorts of places wouldn't normally. > > > > 3) We now track compression state as a boolean separate from fast clear > > type and this is tracked on a per-slice granularity. > > > > The previous scheme had some issues when it came to individual slices of > > a multi-LOD images. In particular, we only tracked "needs resolve" > > per-LOD but you could do a vkCmdPipelineBarrier that would only resolve > > a portion of the image and would set "needs resolve" to false anyway. > > Also, any transition from an undefined layout would reset the clear > > color for the entire LOD regardless of whether or not there was some > > clear color on some other slice. > > > > As far as full/partial resolves go, he assumptions of the previous > > scheme held because the one case where we do need a full resolve when > > CCS_E is enabled is for window-system images. Since we only ever > > allowed X-tiled window-system images, CCS was entirely disabled on gen9+ > > and we never got CCS_E. With the advent of Y-tiled window-system > > buffers, we now need to properly support doing a full resolve of images > > marked CCS_E. > > --- > > src/intel/vulkan/anv_blorp.c | 3 +- > > src/intel/vulkan/anv_image.c | 96 ++- > > src/intel/vulkan/anv_private.h | 53 +++--- > > src/intel/vulkan/genX_cmd_buffer.c | 340 +++--- > --- > > 4 files changed, 331 insertions(+), 161 deletions(-) > > > > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c > > index 3698543..594b0d8 100644 > > --- a/src/intel/vulkan/anv_blorp.c > > +++ b/src/intel/vulkan/anv_blorp.c > > @@ -1757,8 +1757,7 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, > > * particular value and don't care about format or clear value. > > */ > >const struct anv_address clear_color_addr = > > - anv_image_get_clear_color_addr(cmd_buffer->device, image, > > -aspect, level); > > + anv_image_get_clear_color_addr(cmd_buffer->device, image, > aspect); > >surf.clear_color_addr = anv_to_blorp_address(clear_color_addr); > > } > > > > diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c > > index 94b9ecb..d5f8dcf 100644 > > --- a/src/intel/vulkan/anv_image.c > > +++ b/src/intel/vulkan/anv_image.c > > @@ -190,46 +190,54 @@ all_formats_ccs_e_compatible(const struct > gen_device_info *devinfo, > > * fast-clear values in non-trivial cases (e.g., outside of a render > pass in > > * which a fast clear has occurred). > > * > > - * For the purpose of discoverability, the algorithm used to manage > this buffer > > - * is described here. A clear value in this buffer is updated when a > fast clear > > - * is performed on a subresource. One of two synchronization operations > is > > - * performed in order for a following memory access to use the > fast-clear > > - * value: > > - *a. Copy the value from the buffer to the surface state object > used for > > - * reading. This is done implicitly when the value is the clear > value > > - * predetermined to be the default in other surface state > objects. This > > - * is currently only done explicitly for the operation below. > > - *b. Do (a) and use the surface state object to resolve the > subresource. > > - * This is only done during layout transitions for decent > performance. > > + * In order to avoid having multiple clear colors for a single plane of > an > > + * image (hence a single RENDER_SURFACE_STATE), we only allow > fast-clears on > > + * the first slice (level 0, layer 0). At the time of our testing (Jan > 17, > > + * 2018), there were known applications which would benefit from > fast-clearing > > + * more than just the first slice. > > * > > - * With the above scheme, we can fast-clear whenever the hardware > allows except > > - * for two cases in which synchronization becomes impossible or > undesirable: > > - ** The subresource is in the GENERAL layout and is cleared to a > value > > - * other than the special default value. > > + * The fast clear portion of the image is laid out in the following > order: > > * > > - * Performing a synchronization operation in order to read from the > > - * subresource
Re: [Mesa-dev] [PATCH v2 20/24] anv/cmd_buffer: Rework aux tracking
On Fri, Jan 19, 2018 at 03:47:37PM -0800, Jason Ekstrand wrote: > This commit completely reworks aux tracking. This includes a number of > somewhat distinct changes: > > 1) Since we are no longer fast-clearing multiple slices, we only need > to track one fast clear color and one fast clear type. > > 2) We store two bits for fast clear instead of one to let us > distinguish between zero and non-zero fast clear colors. This is > needed so that we can do full resolves when transitioning to > PRESENT_SRC_KHR with gen9 CCS images where we allow zero clear > values in all sorts of places wouldn't normally. > > 3) We now track compression state as a boolean separate from fast clear > type and this is tracked on a per-slice granularity. > > The previous scheme had some issues when it came to individual slices of > a multi-LOD images. In particular, we only tracked "needs resolve" > per-LOD but you could do a vkCmdPipelineBarrier that would only resolve > a portion of the image and would set "needs resolve" to false anyway. > Also, any transition from an undefined layout would reset the clear > color for the entire LOD regardless of whether or not there was some > clear color on some other slice. > > As far as full/partial resolves go, he assumptions of the previous > scheme held because the one case where we do need a full resolve when > CCS_E is enabled is for window-system images. Since we only ever > allowed X-tiled window-system images, CCS was entirely disabled on gen9+ > and we never got CCS_E. With the advent of Y-tiled window-system > buffers, we now need to properly support doing a full resolve of images > marked CCS_E. > --- > src/intel/vulkan/anv_blorp.c | 3 +- > src/intel/vulkan/anv_image.c | 96 ++- > src/intel/vulkan/anv_private.h | 53 +++--- > src/intel/vulkan/genX_cmd_buffer.c | 340 > +++-- > 4 files changed, 331 insertions(+), 161 deletions(-) > > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c > index 3698543..594b0d8 100644 > --- a/src/intel/vulkan/anv_blorp.c > +++ b/src/intel/vulkan/anv_blorp.c > @@ -1757,8 +1757,7 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, > * particular value and don't care about format or clear value. > */ >const struct anv_address clear_color_addr = > - anv_image_get_clear_color_addr(cmd_buffer->device, image, > -aspect, level); > + anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect); >surf.clear_color_addr = anv_to_blorp_address(clear_color_addr); > } > > diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c > index 94b9ecb..d5f8dcf 100644 > --- a/src/intel/vulkan/anv_image.c > +++ b/src/intel/vulkan/anv_image.c > @@ -190,46 +190,54 @@ all_formats_ccs_e_compatible(const struct > gen_device_info *devinfo, > * fast-clear values in non-trivial cases (e.g., outside of a render pass in > * which a fast clear has occurred). > * > - * For the purpose of discoverability, the algorithm used to manage this > buffer > - * is described here. A clear value in this buffer is updated when a fast > clear > - * is performed on a subresource. One of two synchronization operations is > - * performed in order for a following memory access to use the fast-clear > - * value: > - *a. Copy the value from the buffer to the surface state object used for > - * reading. This is done implicitly when the value is the clear value > - * predetermined to be the default in other surface state objects. This > - * is currently only done explicitly for the operation below. > - *b. Do (a) and use the surface state object to resolve the subresource. > - * This is only done during layout transitions for decent performance. > + * In order to avoid having multiple clear colors for a single plane of an > + * image (hence a single RENDER_SURFACE_STATE), we only allow fast-clears on > + * the first slice (level 0, layer 0). At the time of our testing (Jan 17, > + * 2018), there were known applications which would benefit from > fast-clearing > + * more than just the first slice. > * > - * With the above scheme, we can fast-clear whenever the hardware allows > except > - * for two cases in which synchronization becomes impossible or undesirable: > - ** The subresource is in the GENERAL layout and is cleared to a value > - * other than the special default value. > + * The fast clear portion of the image is laid out in the following order: > * > - * Performing a synchronization operation in order to read from the > - * subresource is undesirable in this case. Firstly, b) is not an option > - * because a layout transition isn't required between a write and read > of > - * an image in the GENERAL layout. Secondly, it's undesirable to do a) > - * explicitly because it would
Re: [Mesa-dev] [PATCH] Haiku: convert to autotools
On 2018-02-05 18:13, Dylan Baker wrote: Quoting kallisti5 (2018-02-05 15:36:06) On 2018-02-05 16:14, kallisti5 wrote: > On 2018-02-05 15:39, Dylan Baker wrote: >> Quoting kallisti5 (2018-02-05 12:58:30) >>> On 2017-10-24 11:47, Emil Velikov wrote: >>> > Hi Jerome, >>> > >>> > On 23 October 2017 at 16:58, Jerome Duval>>> > wrote: >>> >> * configure.ac: >>> >> -pthread is not available on Haiku. >>> >> Haiku doesn't require --enable-dri >>> >> build hgl on Haiku >>> >> * egl/Makefile.am: define backendfiles for Haiku >>> >> * src/gallium/Makefile.am: build winsys/sw/hgl, state_trackers/hgl and >>> >> targets/haiku-softpipe on Haiku. >>> >> * src/gallium/targets/haiku-softpipe: add Makefile.am >>> >> * src/gallium/state_trackers/hgl: add Makefile.am >>> >> * winsys/sw/hgl: add Makefile.am >>> >> * src/hgl/Makefile.am: add Makefile.am >>> >> --- >>> > Thanks for the patch. I think Eric has a point regarding splitting this >>> > up. >>> > Here is one way to handle it: >>> > - patch 1 - the driver, aka st/hgl + sw/hgl + targets/haiku >>> > - 2 - src/egl >>> > - 3 - src/hgl >>> > - 4 misc fixes (the SoftwareRenderer.cpp hunk?) >>> > - 5 toggle - configure.ac + src/Makefile.am >>> >>> Hm, it looks like Jerome never got back to work on these changes... >>> let >>> me try to >>> pick up the ball and run with it. >>> >>> > Couple of small suggestions: >>> > - keep all the sources and headers in the sources lists in >>> > Makefile.sources >>> > - how do you guys manage pthreads - please mention that in the commit >>> > message. >>> > >>> > If I'm reading this correctly, you strip out -pthread and there's no >>> > pthread-stubs on Haiku. >>> >>> Haiku (and BeOS for that matter) has pthread support built into its >>> core >>> libroot.so. >>> >>> No need for -lpthread, all applications can assume its presence. >>> Things >>> that link -lpthread actually fail due to a non-existant libpthread... >>> *however* as i'm typing this i'm being told we recently implemented a >>> dummy static libpthread.a to try and appease assumptions about >>> -lpthread >>> existence so i'll remove the pthread checks :-) >>> >>> -- Alex >> >> Hi Alex, >> >> I have a branch for building haiku with meson, when I was trying to >> compile >> neither the scons build nor the autotools build seemed to compile on a >> Haiku VM >> instance (x86_64), that was a few months ago though, so maybe its >> fixed. >> >> Our plan is to remove autotools from mesa, probably this year. I'm >> thinking if >> things look pretty good through the 18.0 release cycle I'll probably >> propose >> marking autotools as deprecated for 18.1 and propose removal in 18.2. > > Ah. crap. I just got autoconfig working :-). Historically I have only > used > SCons for our builds. I always preferred the SCons build since > autotools always > ends up looking like spaghetti. Here is what our current build does: > > https://github.com/haikuports/haikuports/blob/master/sys-libs/mesa/mesa-17.1.4.recipe#L52 > > It looks like Jerome hacked in a patch for autotools... but i've heard > some reports > of instability with the resulting artifacts. > >> I'm not going to block you guys using autotools or NAK anything, I >> just want >> you to be aware that we're trying to consolidate down to just meson >> and >> android.mk files. I can respin the haiku patches and CC you if you're >> interested in >> looking at them. > > If Meson is the future, i'm definitely down helping (or even taking > over) that branch > if it is just incomplete Haiku work. > > I'm going to try and do better maintenance on Haiku Mesa in 2018. I've > been only around > minimally in 2017 am a little out of date. > >> You might also want to see if you guys can update your meson, at least >> last time >> I checked it was 0.42, and I fixed the pthreads stuff in 0.43 so that >> -pthread >> and -lpthread are never added by meson. > > I just installed meson on Haiku and we are currently at 0.43.0 I took a quick crack at meson on Haiku. Getting stuck with a -pthread getting injected somewhere around glapi gen. Lets see if my modifications are close to yours sight unseen :-) https://gist.github.com/kallisti5/eb43162dd4c9e61b5740444d20955118 -- Alex Pretty close. I lied, apparently the pthreads fix is in 0.44 https://github.com/mesonbuild/meson/commit/fc547ad05e5a8e650ae5bc2ecc7d40e4dbcc9f0f Here's my branch, but it needs rebase pretty bad, there's also a patch to use shared glapi that I added trying to see if that would get the build working that needs to be removed: https://github.com/dcbaker/mesa/tree/wip/meson-haiku I went ahead and made a recipe for 0.44.0. Our repos should have it soon. I can confirm the pthread issue is solved via that change. Do you mind if I take the work in your branch and try to rebase/complete it and upstream it? If we can get Meson working, I think Haiku is one of the last SCons consumers... maybe everyone will be on-board dropping
Re: [Mesa-dev] [PATCH] Haiku: convert to autotools
Quoting kallisti5 (2018-02-05 15:36:06) > On 2018-02-05 16:14, kallisti5 wrote: > > On 2018-02-05 15:39, Dylan Baker wrote: > >> Quoting kallisti5 (2018-02-05 12:58:30) > >>> On 2017-10-24 11:47, Emil Velikov wrote: > >>> > Hi Jerome, > >>> > > >>> > On 23 October 2017 at 16:58, Jerome Duval> >>> > wrote: > >>> >> * configure.ac: > >>> >> -pthread is not available on Haiku. > >>> >> Haiku doesn't require --enable-dri > >>> >> build hgl on Haiku > >>> >> * egl/Makefile.am: define backendfiles for Haiku > >>> >> * src/gallium/Makefile.am: build winsys/sw/hgl, state_trackers/hgl and > >>> >> targets/haiku-softpipe on Haiku. > >>> >> * src/gallium/targets/haiku-softpipe: add Makefile.am > >>> >> * src/gallium/state_trackers/hgl: add Makefile.am > >>> >> * winsys/sw/hgl: add Makefile.am > >>> >> * src/hgl/Makefile.am: add Makefile.am > >>> >> --- > >>> > Thanks for the patch. I think Eric has a point regarding splitting this > >>> > up. > >>> > Here is one way to handle it: > >>> > - patch 1 - the driver, aka st/hgl + sw/hgl + targets/haiku > >>> > - 2 - src/egl > >>> > - 3 - src/hgl > >>> > - 4 misc fixes (the SoftwareRenderer.cpp hunk?) > >>> > - 5 toggle - configure.ac + src/Makefile.am > >>> > >>> Hm, it looks like Jerome never got back to work on these changes... > >>> let > >>> me try to > >>> pick up the ball and run with it. > >>> > >>> > Couple of small suggestions: > >>> > - keep all the sources and headers in the sources lists in > >>> > Makefile.sources > >>> > - how do you guys manage pthreads - please mention that in the commit > >>> > message. > >>> > > >>> > If I'm reading this correctly, you strip out -pthread and there's no > >>> > pthread-stubs on Haiku. > >>> > >>> Haiku (and BeOS for that matter) has pthread support built into its > >>> core > >>> libroot.so. > >>> > >>> No need for -lpthread, all applications can assume its presence. > >>> Things > >>> that link -lpthread actually fail due to a non-existant libpthread... > >>> *however* as i'm typing this i'm being told we recently implemented a > >>> dummy static libpthread.a to try and appease assumptions about > >>> -lpthread > >>> existence so i'll remove the pthread checks :-) > >>> > >>> -- Alex > >> > >> Hi Alex, > >> > >> I have a branch for building haiku with meson, when I was trying to > >> compile > >> neither the scons build nor the autotools build seemed to compile on a > >> Haiku VM > >> instance (x86_64), that was a few months ago though, so maybe its > >> fixed. > >> > >> Our plan is to remove autotools from mesa, probably this year. I'm > >> thinking if > >> things look pretty good through the 18.0 release cycle I'll probably > >> propose > >> marking autotools as deprecated for 18.1 and propose removal in 18.2. > > > > Ah. crap. I just got autoconfig working :-). Historically I have only > > used > > SCons for our builds. I always preferred the SCons build since > > autotools always > > ends up looking like spaghetti. Here is what our current build does: > > > > https://github.com/haikuports/haikuports/blob/master/sys-libs/mesa/mesa-17.1.4.recipe#L52 > > > > It looks like Jerome hacked in a patch for autotools... but i've heard > > some reports > > of instability with the resulting artifacts. > > > >> I'm not going to block you guys using autotools or NAK anything, I > >> just want > >> you to be aware that we're trying to consolidate down to just meson > >> and > >> android.mk files. I can respin the haiku patches and CC you if you're > >> interested in > >> looking at them. > > > > If Meson is the future, i'm definitely down helping (or even taking > > over) that branch > > if it is just incomplete Haiku work. > > > > I'm going to try and do better maintenance on Haiku Mesa in 2018. I've > > been only around > > minimally in 2017 am a little out of date. > > > >> You might also want to see if you guys can update your meson, at least > >> last time > >> I checked it was 0.42, and I fixed the pthreads stuff in 0.43 so that > >> -pthread > >> and -lpthread are never added by meson. > > > > I just installed meson on Haiku and we are currently at 0.43.0 > > I took a quick crack at meson on Haiku. Getting stuck with a -pthread > getting injected > somewhere around glapi gen. > > Lets see if my modifications are close to yours sight unseen :-) > > https://gist.github.com/kallisti5/eb43162dd4c9e61b5740444d20955118 > > -- Alex Pretty close. I lied, apparently the pthreads fix is in 0.44 https://github.com/mesonbuild/meson/commit/fc547ad05e5a8e650ae5bc2ecc7d40e4dbcc9f0f Here's my branch, but it needs rebase pretty bad, there's also a patch to use shared glapi that I added trying to see if that would get the build working that needs to be removed: https://github.com/dcbaker/mesa/tree/wip/meson-haiku Dylan signature.asc Description: signature ___ mesa-dev mailing list
[Mesa-dev] [PATCH v2] radeonsi/nir: always set input_usage_mask as using all components
This fixes a regression for now, in the future we should gather the used components properly. V2: just set for VS and correctly handle doubles Fixes: be973ed21f6e "radeonsi: load the right number of components for VS inputs and TBOs" Cc: Marek Olšák--- src/gallium/drivers/radeonsi/si_shader_nir.c | 14 ++ 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 8abffdb8fc..06d9354363 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -304,14 +304,22 @@ void si_nir_scan_shader(const struct nir_shader *nir, unsigned attrib_count = glsl_count_attribute_slots(type, nir->info.stage == MESA_SHADER_VERTEX); + i = variable->data.driver_location; + /* Vertex shader inputs don't have semantics. The state * tracker has already mapped them to attributes via * variable->data.driver_location. */ if (nir->info.stage == MESA_SHADER_VERTEX) { - if (glsl_type_is_dual_slot(variable->type)) + /* TODO: gather the actual input useage and remove this. */ + info->input_usage_mask[i] = TGSI_WRITEMASK_XYZW; + + if (glsl_type_is_dual_slot(variable->type)) { num_inputs += 2; - else + + /* TODO: gather the actual input useage and remove this. */ + info->input_usage_mask[i+1] = TGSI_WRITEMASK_XYZW; + } else num_inputs++; continue; } @@ -327,8 +335,6 @@ void si_nir_scan_shader(const struct nir_shader *nir, continue; } - i = variable->data.driver_location; - for (unsigned j = 0; j < attrib_count; j++, i++) { if (processed_inputs & ((uint64_t)1 << i)) -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 1/6] android: Move gralloc handle struct to libdrm
On Mon, Jan 29, 2018 at 11:37 AM, Robert Fosswrote: > This struct is used in mesa and drm_hwcomposer. > Versions of if have been implemented in several grallocs: > drm_gralloc, gbm_gralloc, minigbm and intel-minigbm. > > Other than the 1:1 move of the struct a new generic name > has been chosen and variables have had comments added to them. > > Signed-off-by: Robert Foss > --- > Changes since v1: > Suggested by Rob Herring: > - Fixed copyright statement > - Moved FDs to be first in handle > - Initialize native_handle_t using native_handle_create() > > Android.mk | 8 +++- > Makefile.sources | 3 ++ > android/gralloc_handle.h | 102 > +++ > 3 files changed, 111 insertions(+), 2 deletions(-) > create mode 100644 android/gralloc_handle.h > > diff --git a/Android.mk b/Android.mk > index 292be2360263..8611c5e316d8 100644 > --- a/Android.mk > +++ b/Android.mk > @@ -28,7 +28,7 @@ LIBDRM_TOP := $(LOCAL_PATH) > > include $(CLEAR_VARS) > > -# Import variables LIBDRM_{,H_,INCLUDE_H_,INCLUDE_VMWGFX_H_}FILES > +# Import variables > LIBDRM_{,H,INCLUDE_H,INCLUDE_ANDROID_H,INCLUDE_VMWGFX_H}_FILES > include $(LOCAL_PATH)/Makefile.sources > > #static library for the device (recovery) > @@ -38,7 +38,8 @@ LOCAL_MODULE := libdrm > LOCAL_SRC_FILES := $(LIBDRM_FILES) > LOCAL_EXPORT_C_INCLUDE_DIRS := \ > $(LOCAL_PATH) \ > - $(LOCAL_PATH)/include/drm > + $(LOCAL_PATH)/include/drm \ > + $(LOCAL_PATH)/android > > LOCAL_C_INCLUDES := \ > $(LOCAL_PATH)/include/drm > @@ -54,6 +55,9 @@ LOCAL_SRC_FILES := $(LIBDRM_FILES) > LOCAL_EXPORT_C_INCLUDE_DIRS := \ > $(LOCAL_PATH)/include/drm > > +LOCAL_SHARED_LIBRARIES := \ > + libcutils > + > LOCAL_C_INCLUDES := \ > $(LOCAL_PATH)/include/drm > > diff --git a/Makefile.sources b/Makefile.sources > index 10aa1d0f4b6e..1f8372bca183 100644 > --- a/Makefile.sources > +++ b/Makefile.sources > @@ -37,5 +37,8 @@ LIBDRM_INCLUDE_H_FILES := \ > include/drm/via_drm.h \ > include/drm/virtgpu_drm.h > > +LIBDRM_INCLUDE_ANDROID_H_FILES := \ > + android/gralloc_handle.h > + > LIBDRM_INCLUDE_VMWGFX_H_FILES := \ > include/drm/vmwgfx_drm.h > diff --git a/android/gralloc_handle.h b/android/gralloc_handle.h > new file mode 100644 > index ..770ee7adb4b5 > --- /dev/null > +++ b/android/gralloc_handle.h > @@ -0,0 +1,102 @@ > +/* > + * Copyright (C) 2018 Robert Foss Sorry, if I wasn't clear, but this obviously comes from gralloc_drm_handle.h. You should maintain those copyrights (and make sure we aren't changing the license). > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > THE > + * SOFTWARE. > + * > + * Authors: > + *Robert Foss > + */ > + > +#ifndef __ANDROID_GRALLOC_HANDLE_H__ > +#define __ANDROID_GRALLOC_HANDLE_H__ > + > +#include > + > +/* support users of drm_gralloc/gbm_gralloc */ > +#define gralloc_gbm_handle_t gralloc_handle_t > +#define gralloc_drm_handle_t gralloc_handle_t > + > +struct gralloc_handle_t { > + native_handle_t base; > + > + /* dma-buf file descriptor > +* Must be located first since, native_handle_t is allocated > +* using native_handle_create(), which allocates space for > +* sizeof(native_handle_t) + sizeof(int) * (numFds + numInts) > +* numFds = GRALLOC_HANDLE_NUM_FDS > +* numInts = GRALLOC_HANDLE_NUM_INTS > +* Where numFds represents the number of FDs and > +* numInts represents the space needed for the > +* remainder of this struct. > +* And the FDs are expected to be found first following > +* native_handle_t. > +*/ > + int prime_fd; > + > + int magic; /* differentiate between allocator impls */
[Mesa-dev] [PATCH] i965: Do null pointer check before dereferencing vue_prog_data
Signed-off-by: Anuj Phogat--- src/mesa/drivers/dri/i965/genX_state_upload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index aa4d64d08e..67fb328dbc 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -3966,7 +3966,8 @@ genX(upload_ds_state)(struct brw_context *brw) tes_prog_data->domain == BRW_TESS_DOMAIN_TRI; #if GEN_GEN >= 8 -if (vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8) +if (vue_prog_data && +vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8) ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH; ds.UserClipDistanceCullTestEnableBitmask = vue_prog_data->cull_distance_mask; -- 2.13.6 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] intel/isl: Add assertion for aux surface pitch
I don't have a test case hitting this assert. But, it's nice to have an assert checking the limit. Signed-off-by: Anuj Phogat--- src/intel/isl/isl_surface_state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/intel/isl/isl_surface_state.c b/src/intel/isl/isl_surface_state.c index bfb27fa4a4..afd4b80ddb 100644 --- a/src/intel/isl/isl_surface_state.c +++ b/src/intel/isl/isl_surface_state.c @@ -566,6 +566,8 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, s.AuxiliarySurfaceBaseAddress = info->aux_address; s.AuxiliarySurfacePitch = pitch_in_tiles - 1; + assert(s.AuxiliarySurfacePitch <= 511); + #if GEN_GEN >= 8 assert(GEN_GEN >= 9 || info->aux_usage != ISL_AUX_USAGE_CCS_E); /* Auxiliary surfaces in ISL have compressed formats but the hardware -- 2.13.6 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/3] Fix and tweak to the VAO v2
On 02/05/2018 03:19 PM, mathias.froehl...@gmx.net wrote: From: Mathias FröhlichHi Brian, Actually after incorporating your review requests to set gl_vertex_array::Size and gl_vertex_array::Ptr to zero, radeonsi started to assert in Bitmap/CopyPixels/DrawPixels. Which assertion, exactly? And what test triggers it? I'd like to take a close look with the llvmpipe/svga drivers just so I understand what's happening. -Brian So, here the updated series including the requested changes. And additoinally for review the change to fix the mentioned asserts in several piglit tests. Please review! best Mathias Mathias Fröhlich (3): mesa: Fix VAO buffer object tracking. mesa: Mute arrays for Bitmap/CopyPixels/DrawPixels callbacks. mesa: Only update enabled VAO gl_vertex_array entries. src/mesa/main/drawpix.c | 10 ++ src/mesa/main/varray.c | 10 ++ src/mesa/main/varray.h | 29 ++--- 3 files changed, 34 insertions(+), 15 deletions(-) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] util: remove redundant check for the __clang__ macro
I'm tempted to say the other places which only check for __GNUC__ should also check for __clang__, just to be move obvious (or does everyone know that __clang__ implies __GNUC_?). Maybe others have an opinion. Anyway, the location in question below seems to be first place this appears in the file so I'd suggest putting a simple comment there, like /* Note: Clang also sets __GNUC__ (see other cases below) */ -Brian On 02/05/2018 02:41 PM, Vlad Golovkin wrote: In this file there are similar cases with macros PUBLIC, USED and ATTRIBUTE_NOINLINE, before defining which as __attribute__(...), code only checks for __GNUC__. Should I add comments there as well? 2018-02-05 22:51 GMT+02:00 Brian Paul: On 02/05/2018 01:44 PM, Vlad Golovkin wrote: Clang defines __GNUC__ macro, so one doesn't need to check __clang__ macro in this particular case. Perhaps mention that in a comment below so there's no confusion. -Brian --- src/util/macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/macros.h b/src/util/macros.h index 432d513930..d36ca095d5 100644 --- a/src/util/macros.h +++ b/src/util/macros.h @@ -138,7 +138,7 @@ do { \ /* Forced function inlining */ #ifndef ALWAYS_INLINE -# if defined(__GNUC__) || defined(__clang__) +# if defined(__GNUC__) #define ALWAYS_INLINE inline __attribute__((always_inline)) # elif defined(_MSC_VER) #define ALWAYS_INLINE __forceinline ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Haiku: convert to autotools
Quoting kallisti5 (2018-02-05 14:14:42) > On 2018-02-05 15:39, Dylan Baker wrote: > > Quoting kallisti5 (2018-02-05 12:58:30) > >> On 2017-10-24 11:47, Emil Velikov wrote: > >> > Hi Jerome, > >> > > >> > On 23 October 2017 at 16:58, Jerome Duval> >> > wrote: > >> >> * configure.ac: > >> >> -pthread is not available on Haiku. > >> >> Haiku doesn't require --enable-dri > >> >> build hgl on Haiku > >> >> * egl/Makefile.am: define backendfiles for Haiku > >> >> * src/gallium/Makefile.am: build winsys/sw/hgl, state_trackers/hgl and > >> >> targets/haiku-softpipe on Haiku. > >> >> * src/gallium/targets/haiku-softpipe: add Makefile.am > >> >> * src/gallium/state_trackers/hgl: add Makefile.am > >> >> * winsys/sw/hgl: add Makefile.am > >> >> * src/hgl/Makefile.am: add Makefile.am > >> >> --- > >> > Thanks for the patch. I think Eric has a point regarding splitting this > >> > up. > >> > Here is one way to handle it: > >> > - patch 1 - the driver, aka st/hgl + sw/hgl + targets/haiku > >> > - 2 - src/egl > >> > - 3 - src/hgl > >> > - 4 misc fixes (the SoftwareRenderer.cpp hunk?) > >> > - 5 toggle - configure.ac + src/Makefile.am > >> > >> Hm, it looks like Jerome never got back to work on these changes... > >> let > >> me try to > >> pick up the ball and run with it. > >> > >> > Couple of small suggestions: > >> > - keep all the sources and headers in the sources lists in > >> > Makefile.sources > >> > - how do you guys manage pthreads - please mention that in the commit > >> > message. > >> > > >> > If I'm reading this correctly, you strip out -pthread and there's no > >> > pthread-stubs on Haiku. > >> > >> Haiku (and BeOS for that matter) has pthread support built into its > >> core > >> libroot.so. > >> > >> No need for -lpthread, all applications can assume its presence. > >> Things > >> that link -lpthread actually fail due to a non-existant libpthread... > >> *however* as i'm typing this i'm being told we recently implemented a > >> dummy static libpthread.a to try and appease assumptions about > >> -lpthread > >> existence so i'll remove the pthread checks :-) > >> > >> -- Alex > > > > Hi Alex, > > > > I have a branch for building haiku with meson, when I was trying to > > compile > > neither the scons build nor the autotools build seemed to compile on a > > Haiku VM > > instance (x86_64), that was a few months ago though, so maybe its > > fixed. > > > > Our plan is to remove autotools from mesa, probably this year. I'm > > thinking if > > things look pretty good through the 18.0 release cycle I'll probably > > propose > > marking autotools as deprecated for 18.1 and propose removal in 18.2. > > Ah. crap. I just got autoconfig working :-). Historically I have only > used > SCons for our builds. I always preferred the SCons build since > autotools always > ends up looking like spaghetti. Here is what our current build does: Sorry, I've meant to get on this a little faster, but the meson conversion has been a lot more time consuming than I predicted it would be, lol. > > https://github.com/haikuports/haikuports/blob/master/sys-libs/mesa/mesa-17.1.4.recipe#L52 Meson should make that a little simpler, since it has an install target, and we should be able to make that work for you guys as well. > > It looks like Jerome hacked in a patch for autotools... but i've heard > some reports > of instability with the resulting artifacts. > > > I'm not going to block you guys using autotools or NAK anything, I just > > want > > you to be aware that we're trying to consolidate down to just meson and > > android.mk files. I can respin the haiku patches and CC you if you're > > interested in > > looking at them. > > If Meson is the future, i'm definitely down helping (or even taking > over) that branch > if it is just incomplete Haiku work. I think it's pretty close to being ready for review. It's based on a branch to fix static-glapi with meson that never landed, but I think I have that all sorted now so I'm going to push that today, and that should make the haiku build pretty simple (it's just one patch after that I think). > > I'm going to try and do better maintenance on Haiku Mesa in 2018. I've > been only around > minimally in 2017 am a little out of date. > > > You might also want to see if you guys can update your meson, at least > > last time > > I checked it was 0.42, and I fixed the pthreads stuff in 0.43 so that > > -pthread > > and -lpthread are never added by meson. > > I just installed meson on Haiku and we are currently at 0.43.0 > Awesome, that makes things a lot easier with meson. You can have a look at the build in general, I think meson is syntactically pretty nice, it's like a minimal python or ruby, and the builds are much faster (that's just because ninja is really smart). Dylan signature.asc Description: signature ___ mesa-dev mailing list
Re: [Mesa-dev] [PATCH] Haiku: convert to autotools
On 2018-02-05 16:14, kallisti5 wrote: On 2018-02-05 15:39, Dylan Baker wrote: Quoting kallisti5 (2018-02-05 12:58:30) On 2017-10-24 11:47, Emil Velikov wrote: > Hi Jerome, > > On 23 October 2017 at 16:58, Jerome Duval> wrote: >> * configure.ac: >> -pthread is not available on Haiku. >> Haiku doesn't require --enable-dri >> build hgl on Haiku >> * egl/Makefile.am: define backendfiles for Haiku >> * src/gallium/Makefile.am: build winsys/sw/hgl, state_trackers/hgl and >> targets/haiku-softpipe on Haiku. >> * src/gallium/targets/haiku-softpipe: add Makefile.am >> * src/gallium/state_trackers/hgl: add Makefile.am >> * winsys/sw/hgl: add Makefile.am >> * src/hgl/Makefile.am: add Makefile.am >> --- > Thanks for the patch. I think Eric has a point regarding splitting this > up. > Here is one way to handle it: > - patch 1 - the driver, aka st/hgl + sw/hgl + targets/haiku > - 2 - src/egl > - 3 - src/hgl > - 4 misc fixes (the SoftwareRenderer.cpp hunk?) > - 5 toggle - configure.ac + src/Makefile.am Hm, it looks like Jerome never got back to work on these changes... let me try to pick up the ball and run with it. > Couple of small suggestions: > - keep all the sources and headers in the sources lists in > Makefile.sources > - how do you guys manage pthreads - please mention that in the commit > message. > > If I'm reading this correctly, you strip out -pthread and there's no > pthread-stubs on Haiku. Haiku (and BeOS for that matter) has pthread support built into its core libroot.so. No need for -lpthread, all applications can assume its presence. Things that link -lpthread actually fail due to a non-existant libpthread... *however* as i'm typing this i'm being told we recently implemented a dummy static libpthread.a to try and appease assumptions about -lpthread existence so i'll remove the pthread checks :-) -- Alex Hi Alex, I have a branch for building haiku with meson, when I was trying to compile neither the scons build nor the autotools build seemed to compile on a Haiku VM instance (x86_64), that was a few months ago though, so maybe its fixed. Our plan is to remove autotools from mesa, probably this year. I'm thinking if things look pretty good through the 18.0 release cycle I'll probably propose marking autotools as deprecated for 18.1 and propose removal in 18.2. Ah. crap. I just got autoconfig working :-). Historically I have only used SCons for our builds. I always preferred the SCons build since autotools always ends up looking like spaghetti. Here is what our current build does: https://github.com/haikuports/haikuports/blob/master/sys-libs/mesa/mesa-17.1.4.recipe#L52 It looks like Jerome hacked in a patch for autotools... but i've heard some reports of instability with the resulting artifacts. I'm not going to block you guys using autotools or NAK anything, I just want you to be aware that we're trying to consolidate down to just meson and android.mk files. I can respin the haiku patches and CC you if you're interested in looking at them. If Meson is the future, i'm definitely down helping (or even taking over) that branch if it is just incomplete Haiku work. I'm going to try and do better maintenance on Haiku Mesa in 2018. I've been only around minimally in 2017 am a little out of date. You might also want to see if you guys can update your meson, at least last time I checked it was 0.42, and I fixed the pthreads stuff in 0.43 so that -pthread and -lpthread are never added by meson. I just installed meson on Haiku and we are currently at 0.43.0 I took a quick crack at meson on Haiku. Getting stuck with a -pthread getting injected somewhere around glapi gen. Lets see if my modifications are close to yours sight unseen :-) https://gist.github.com/kallisti5/eb43162dd4c9e61b5740444d20955118 -- Alex ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] r600/fp64: Fix build.
This is turning our CI red, so I'm going to go ahead and push this. Quoting Vinson Lee (2018-02-05 15:24:45) > CC r600_shader.lo > r600_shader.c: In function ‘egcm_int_to_double’: > r600_shader.c:4543:12: error: ‘ctx’ is a pointer; did you mean to use ‘->’? > if (ctx.bc->chip_class == CAYMAN) > ^ > -> > > Fixes: 35b430157776 ("r600/fp64: fix integer->double conversion") > Signed-off-by: Vinson Lee> --- > src/gallium/drivers/r600/r600_shader.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index e3b832b04f77..4874d14a581d 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -4540,7 +4540,7 @@ static int egcm_int_to_double(struct r600_shader_ctx > *ctx) > alu.dst.sel = temp_reg; > alu.dst.chan = i; > alu.dst.write = 1; > - if (ctx.bc->chip_class == CAYMAN) > + if (ctx->bc->chip_class == CAYMAN) > alu.last = i == dchan + 1; > else > alu.last = 1; /* trans only ops on > evergreen */ > -- > 2.14.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] r600/fp64: Fix build.
I just wrote the same patch: Reviewed-by: Dylan BakerQuoting Vinson Lee (2018-02-05 15:24:45) > CC r600_shader.lo > r600_shader.c: In function ‘egcm_int_to_double’: > r600_shader.c:4543:12: error: ‘ctx’ is a pointer; did you mean to use ‘->’? > if (ctx.bc->chip_class == CAYMAN) > ^ > -> > > Fixes: 35b430157776 ("r600/fp64: fix integer->double conversion") > Signed-off-by: Vinson Lee > --- > src/gallium/drivers/r600/r600_shader.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index e3b832b04f77..4874d14a581d 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -4540,7 +4540,7 @@ static int egcm_int_to_double(struct r600_shader_ctx > *ctx) > alu.dst.sel = temp_reg; > alu.dst.chan = i; > alu.dst.write = 1; > - if (ctx.bc->chip_class == CAYMAN) > + if (ctx->bc->chip_class == CAYMAN) > alu.last = i == dchan + 1; > else > alu.last = 1; /* trans only ops on > evergreen */ > -- > 2.14.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nir: lower fexp2(fmul(flog2(a), 2)) to fmul(a, a)
Do you have any data from shader-db for this change (and the other patch)? On 02/05/2018 06:08 AM, Samuel Pitoiset wrote: > Similar for the 4 case. > > Suggested by Bas. > > Signed-off-by: Samuel Pitoiset> --- > src/compiler/nir/nir_opt_algebraic.py | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/src/compiler/nir/nir_opt_algebraic.py > b/src/compiler/nir/nir_opt_algebraic.py > index 6dc19d9b12..b30d1df199 100644 > --- a/src/compiler/nir/nir_opt_algebraic.py > +++ b/src/compiler/nir/nir_opt_algebraic.py > @@ -321,6 +321,8 @@ optimizations = [ > (('~fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), > '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b > (('~fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), > d))), > ('~fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # > 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d > + (('~fexp2', ('fmul', ('flog2', a), 2.0)), ('fmul', a, a)), > + (('~fexp2', ('fmul', ('flog2', a), 4.0)), ('fmul', ('fmul', a, a), > ('fmul', a, a))), > (('~fpow', a, 1.0), a), > (('~fpow', a, 2.0), ('fmul', a, a)), > (('~fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))), > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] r600/fp64: Fix build.
CC r600_shader.lo r600_shader.c: In function ‘egcm_int_to_double’: r600_shader.c:4543:12: error: ‘ctx’ is a pointer; did you mean to use ‘->’? if (ctx.bc->chip_class == CAYMAN) ^ -> Fixes: 35b430157776 ("r600/fp64: fix integer->double conversion") Signed-off-by: Vinson Lee--- src/gallium/drivers/r600/r600_shader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index e3b832b04f77..4874d14a581d 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -4540,7 +4540,7 @@ static int egcm_int_to_double(struct r600_shader_ctx *ctx) alu.dst.sel = temp_reg; alu.dst.chan = i; alu.dst.write = 1; - if (ctx.bc->chip_class == CAYMAN) + if (ctx->bc->chip_class == CAYMAN) alu.last = i == dchan + 1; else alu.last = 1; /* trans only ops on evergreen */ -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 1/3] st/glsl_to_tgsi: move nir detection earlier - bisected
On 05/02/18 15:04, Dieter Nützel wrote: Am 02.02.2018 10:24, schrieb Timothy Arceri: On 02/02/18 19:26, Dieter Nützel wrote: Hello Tim, _this_ version brake UH, UV, mpv, blender 2.79 (some test files not all). Must be something with the cache file(s). The cache currently needs to be deleted when switching between nir and tgsi. I'm not sure it I should try to avoid this or not ... I guess it will probably save some bug reports so I'll try send a follow up patch. Hi Tim, it is NOT your fault. I tracked it down to Marek's commit commit be973ed21f6e456ebd753f26a99151d9ea6e765c This should fix things for now: https://patchwork.freedesktop.org/patch/202759/ ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] radeonsi/nir: always set input_usage_mask as using all components
This fixes a regression for now, in the future we should gather the used components properly. Fixes: be973ed21f6e "radeonsi: load the right number of components for VS inputs and TBOs" Cc: Marek Olšák--- src/gallium/drivers/radeonsi/si_shader_nir.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 8abffdb8fc..d2ea09706d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -304,6 +304,11 @@ void si_nir_scan_shader(const struct nir_shader *nir, unsigned attrib_count = glsl_count_attribute_slots(type, nir->info.stage == MESA_SHADER_VERTEX); + i = variable->data.driver_location; + + /* TODO: gather the actual input useage and remove this. */ + info->input_usage_mask[i] = TGSI_WRITEMASK_XYZW; + /* Vertex shader inputs don't have semantics. The state * tracker has already mapped them to attributes via * variable->data.driver_location. @@ -327,8 +332,6 @@ void si_nir_scan_shader(const struct nir_shader *nir, continue; } - i = variable->data.driver_location; - for (unsigned j = 0; j < attrib_count; j++, i++) { if (processed_inputs & ((uint64_t)1 << i)) -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/5] anv/image: Support CCS_E for images which may be used for storage
We have to do resolves whenever we go into the general layout for these images. However, it also means that images which declare the storage usage but don't actually need it most of the time will still get compression. --- src/intel/vulkan/anv_image.c | 18 +++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 011e952..38f1c47 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -459,8 +459,7 @@ make_surface(const struct anv_device *dev, * a render target. This means that it's safe to just leave * compression on at all times for these formats. */ -if (!(vk_info->usage & VK_IMAGE_USAGE_STORAGE_BIT) && -all_formats_ccs_e_compatible(>info, vk_info)) { +if (all_formats_ccs_e_compatible(>info, vk_info)) { image->planes[plane].aux_usage = ISL_AUX_USAGE_CCS_E; } } @@ -795,9 +794,22 @@ anv_layout_to_aux_usage(const struct gen_device_info * const devinfo, return ISL_AUX_USAGE_NONE; + case VK_IMAGE_LAYOUT_GENERAL: + if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) { + return ISL_AUX_USAGE_NONE; + } else if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) { + /* If we might be used as a storage image and we're in the general + * layout, we have to disable aux because the dataport doesn't + * support CCS. + */ + return ISL_AUX_USAGE_NONE; + } else { + return image->planes[plane].aux_usage; + } + + /* Transfer Layouts */ - case VK_IMAGE_LAYOUT_GENERAL: case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) { -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/5] anv/cmd_buffer: Simplify transition_depth_buffer
If we don't have HiZ, then anv_layout_to_aux_usage will return NONE for both layouts. If the two layouts are the same, they will get the aux usage. In either case, the code below will give us ISL_AUX_OP_NONE and we'll return without doing anything. --- src/intel/vulkan/genX_cmd_buffer.c | 12 1 file changed, 12 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 819bd36..a7950cf 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -419,18 +419,6 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer, VkImageLayout initial_layout, VkImageLayout final_layout) { - assert(image); - - /* A transition is a no-op if HiZ is not enabled, or if the initial and -* final layouts are equal. -* -* The undefined layout indicates that the user doesn't care about the data -* that's currently in the buffer. Therefore, a data-preserving resolve -* operation is not needed. -*/ - if (image->planes[0].aux_usage != ISL_AUX_USAGE_HIZ || initial_layout == final_layout) - return; - const bool hiz_enabled = ISL_AUX_USAGE_HIZ == anv_layout_to_aux_usage(_buffer->device->info, image, VK_IMAGE_ASPECT_DEPTH_BIT, initial_layout); -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/5] anv/cmd_buffer: Use layout_to_* helpers in compute_aux_usage
--- src/intel/vulkan/genX_cmd_buffer.c | 53 +- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index a7950cf..056528f 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -241,16 +241,27 @@ color_attachment_compute_aux_usage(struct anv_device * device, att_state->input_aux_usage = ISL_AUX_USAGE_NONE; att_state->fast_clear = false; return; - } else if (iview->image->planes[0].aux_usage == ISL_AUX_USAGE_MCS) { - att_state->aux_usage = ISL_AUX_USAGE_MCS; + } + + att_state->aux_usage = + anv_layout_to_aux_usage(>info, iview->image, + VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + /* If we don't have aux, then we should have returned early in the layer +* check above. If we got here, we must have something. +*/ + assert(att_state->aux_usage != ISL_AUX_USAGE_NONE); + + if (att_state->aux_usage == ISL_AUX_USAGE_MCS) { att_state->input_aux_usage = ISL_AUX_USAGE_MCS; att_state->fast_clear = false; return; - } else if (iview->image->planes[0].aux_usage == ISL_AUX_USAGE_CCS_E) { - att_state->aux_usage = ISL_AUX_USAGE_CCS_E; + } + + if (att_state->aux_usage == ISL_AUX_USAGE_CCS_E) { att_state->input_aux_usage = ISL_AUX_USAGE_CCS_E; } else { - att_state->aux_usage = ISL_AUX_USAGE_CCS_D; /* From the Sky Lake PRM, RENDER_SURFACE_STATE::AuxiliarySurfaceMode: * *"If Number of Multisamples is MULTISAMPLECOUNT_1, AUX_CCS_D @@ -292,8 +303,25 @@ color_attachment_compute_aux_usage(struct anv_device * device, att_state->clear_value.color.uint32[3] == 0; if (att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT) { - /* Start off assuming fast clears are possible */ - att_state->fast_clear = true; + /* Start by getting the fast clear type. We use the first subpass + * layout here because we don't want to fast-clear if the first subpass + * to use the attachment can't handle fast-clears. + */ + enum anv_fast_clear_type fast_clear_type = + anv_layout_to_fast_clear_type(>info, iview->image, + VK_IMAGE_ASPECT_COLOR_BIT, + cmd_state->pass->attachments[att].first_subpass_layout); + switch (fast_clear_type) { + case ANV_FAST_CLEAR_NONE: + att_state->fast_clear = false; + break; + case ANV_FAST_CLEAR_DEFAULT_VALUE: + att_state->fast_clear = att_state->clear_color_is_zero; + break; + case ANV_FAST_CLEAR_ANY: + att_state->fast_clear = true; + break; + } /* Potentially, we could do partial fast-clears but doing so has crazy * alignment restrictions. It's easier to just restrict to full size @@ -309,17 +337,6 @@ color_attachment_compute_aux_usage(struct anv_device * device, if (GEN_GEN <= 8 && !att_state->clear_color_is_zero_one) att_state->fast_clear = false; - /* We only allow fast clears in the GENERAL layout if the auxiliary - * buffer is always enabled and the fast-clear value is all 0's. See - * add_fast_clear_state_buffer() for more information. - */ - if (cmd_state->pass->attachments[att].first_subpass_layout == - VK_IMAGE_LAYOUT_GENERAL && - (!att_state->clear_color_is_zero || - iview->image->planes[0].aux_usage == ISL_AUX_USAGE_NONE)) { - att_state->fast_clear = false; - } - /* We only allow fast clears to the first slice of an image (level 0, * layer 0) and only for the entire slice. This guarantees us that, at * any given time, there is only one clear color on any given image at -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/5] anv/blorp: Use layout_to_aux_usage when a layout is provided
Instead of having aux usage and ANV_AUX_USAGE_DEFAULT to mean "give me something reasonable" we now use anv_layout_to_aux_usage whenever a layout is available. If a layout is available, we ignore the aux_usage parameter. For the cases where we have an explicit aux usage such as clears and aux ops, we have a new ANV_IMAGE_LAYOUT_EXPLICIT_AUX layout. --- src/intel/vulkan/anv_blorp.c | 71 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 4018476..1cef587 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -173,7 +173,10 @@ get_blorp_surf_for_anv_buffer(struct anv_device *device, assert(ok); } -#define ANV_AUX_USAGE_DEFAULT ((enum isl_aux_usage)0xff) +/* Pick something high enough that it won't be used in core and low enough it + * will never map to an extension. + */ +#define ANV_IMAGE_LAYOUT_EXPLICIT_AUX (VkImageLayout)1000 static struct blorp_address anv_to_blorp_address(struct anv_address addr) @@ -188,18 +191,14 @@ static void get_blorp_surf_for_anv_image(const struct anv_device *device, const struct anv_image *image, VkImageAspectFlags aspect, + VkImageLayout layout, enum isl_aux_usage aux_usage, struct blorp_surf *blorp_surf) { uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); - if (aux_usage == ANV_AUX_USAGE_DEFAULT) { - aux_usage = image->planes[plane].aux_usage; - - /* Blorp copies and blits can't handle HiZ so disable it by default */ - if (aux_usage == ISL_AUX_USAGE_HIZ) - aux_usage = ISL_AUX_USAGE_NONE; - } + if (layout != ANV_IMAGE_LAYOUT_EXPLICIT_AUX) + aux_usage = anv_layout_to_aux_usage(>info, image, aspect, layout); const struct anv_surface *surface = >planes[plane].surface; *blorp_surf = (struct blorp_surf) { @@ -279,10 +278,12 @@ void anv_CmdCopyImage( struct blorp_surf src_surf, dst_surf; get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, 1UL << aspect_bit, - ANV_AUX_USAGE_DEFAULT, _surf); + srcImageLayout, ISL_AUX_USAGE_NONE, + _surf); get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, 1UL << aspect_bit, - ANV_AUX_USAGE_DEFAULT, _surf); + dstImageLayout, ISL_AUX_USAGE_NONE, + _surf); anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, 1UL << aspect_bit, dst_surf.aux_usage, dst_level, @@ -299,9 +300,11 @@ void anv_CmdCopyImage( } else { struct blorp_surf src_surf, dst_surf; get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, src_mask, - ANV_AUX_USAGE_DEFAULT, _surf); + srcImageLayout, ISL_AUX_USAGE_NONE, + _surf); get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, dst_mask, - ANV_AUX_USAGE_DEFAULT, _surf); + dstImageLayout, ISL_AUX_USAGE_NONE, + _surf); anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, dst_mask, dst_surf.aux_usage, dst_level, dst_base_layer, layer_count); @@ -323,6 +326,7 @@ static void copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, struct anv_buffer *anv_buffer, struct anv_image *anv_image, + VkImageLayout image_layout, uint32_t regionCount, const VkBufferImageCopy* pRegions, bool buffer_to_image) @@ -351,7 +355,8 @@ copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, const VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; get_blorp_surf_for_anv_image(cmd_buffer->device, anv_image, aspect, - ANV_AUX_USAGE_DEFAULT, ); + image_layout, ISL_AUX_USAGE_NONE, + ); image.offset = anv_sanitize_image_offset(anv_image->type, pRegions[r].imageOffset); image.level = pRegions[r].imageSubresource.mipLevel; @@ -426,7 +431,7 @@ void anv_CmdCopyBufferToImage( ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); ANV_FROM_HANDLE(anv_image,
[Mesa-dev] [PATCH 0/5] anv: Support CCS_E for images which may be used for
This little series adds support for enabling CCS_E for images which may have VK_IMAGE_USAGE_STORAGE_BIT set. Previously, we just bailed on these images and disabled CCS. However, so long as we do a full resolve when entering VK_IMAGE_LAYOUT_GENERAL, we can support CCS_E for all the other layouts just fine. The primary motivation of this series is actually by trying to get better test coverage of our resolve code. By doing resolves when going into VK_IMAGE_LAYOUT_GENERAL, the full resolve paths now get tested on many more image types. I have no idea what the perf impact of this will be. The first 4 patches just make use use layout_to_* more often. Jason Ekstrand (5): anv/cmd_buffer: Simplify transition_depth_buffer anv/cmd_buffer: Use layout_to_* helpers in compute_aux_usage anv/cmd_buffer: Delete some assert-only variables anv/blorp: Use layout_to_aux_usage when a layout is provided anv/image: Support CCS_E for images which may be used for storage src/intel/vulkan/anv_blorp.c | 71 -- src/intel/vulkan/anv_image.c | 18 -- src/intel/vulkan/genX_cmd_buffer.c | 70 +++-- 3 files changed, 97 insertions(+), 62 deletions(-) -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/5] anv/cmd_buffer: Delete some assert-only variables
Checking the sample count is almost as good as aux usage in this case. --- src/intel/vulkan/genX_cmd_buffer.c | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 056528f..afe577c 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -732,9 +732,6 @@ init_fast_clear_color(struct anv_cmd_buffer *cmd_buffer, set_image_fast_clear_state(cmd_buffer, image, aspect, ANV_FAST_CLEAR_NONE); - uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); - enum isl_aux_usage aux_usage = image->planes[plane].aux_usage; - /* The fast clear value dword(s) will be copied into a surface state object. * Ensure that the restrictions of the fields in the dword(s) are followed. * @@ -755,7 +752,7 @@ init_fast_clear_color(struct anv_cmd_buffer *cmd_buffer, if (GEN_GEN >= 9) { /* MCS buffers on SKL+ can only have 1/0 clear colors. */ -assert(aux_usage == ISL_AUX_USAGE_MCS); +assert(image->samples > 1); sdi.ImmediateData = 0; } else if (GEN_VERSIONx10 >= 75) { /* Pre-SKL, the dword containing the clear values also contains -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 14/14] anv/cmd_buffer: Avoid unnecessary transitions before fast clears
Previously, we would always apply the layout transition at the beginning of the subpass and then do the clear whether fast or slow. This meant that there were some cases, specifically when the initial layout is VK_IMAGE_LAYOUT_UNDEFINED, where we would end up doing a fast-clear or ambiguate followed immediately by a fast-clear. This probably isn't terribly expensive, but it is a waste that we can avoid easily enough now that we're doing everything at the same time in begin_subpass. --- src/intel/vulkan/genX_cmd_buffer.c | 57 ++ 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 2732ef3..819bd36 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3326,39 +3326,25 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, target_layout = subpass->attachments[i].layout; } - if (image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { - assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); - transition_color_buffer(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT, - iview->planes[0].isl.base_level, 1, - iview->planes[0].isl.base_array_layer, - iview->planes[0].isl.array_len, - att_state->current_layout, target_layout); - } else if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { - transition_depth_buffer(cmd_buffer, image, - att_state->current_layout, target_layout); - att_state->aux_usage = -anv_layout_to_aux_usage(_buffer->device->info, image, -VK_IMAGE_ASPECT_DEPTH_BIT, target_layout); - } - att_state->current_layout = target_layout; + uint32_t base_layer = iview->planes[0].isl.base_array_layer; + uint32_t layer_count = fb->layers; - if (att_state->pending_clear_aspects & VK_IMAGE_ASPECT_COLOR_BIT) { - assert(att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT); - - /* Multi-planar images are not supported as attachments */ + if (image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); - assert(image->n_planes == 1); - - uint32_t base_layer = iview->planes[0].isl.base_array_layer; - uint32_t layer_count = fb->layers; - if (att_state->fast_clear) { + if ((att_state->pending_clear_aspects & VK_IMAGE_ASPECT_COLOR_BIT) && + att_state->fast_clear) { /* We only support fast-clears on the first layer */ assert(iview->planes[0].isl.base_level == 0); assert(iview->planes[0].isl.base_array_layer == 0); anv_image_ccs_op(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1, ISL_AUX_OP_FAST_CLEAR, false); + +/* Performing a fast clear takes care of all our transition needs + * for the first slice. Increment the base layer and layer count + * so that later transitions and clears don't touch layer 0. + */ base_layer++; layer_count--; @@ -3383,6 +3369,29 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, } if (layer_count > 0) { +transition_color_buffer(cmd_buffer, image, +VK_IMAGE_ASPECT_COLOR_BIT, +iview->planes[0].isl.base_level, 1, +base_layer, layer_count, +att_state->current_layout, target_layout); + } + } else if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + transition_depth_buffer(cmd_buffer, image, + att_state->current_layout, target_layout); + att_state->aux_usage = +anv_layout_to_aux_usage(_buffer->device->info, image, +VK_IMAGE_ASPECT_DEPTH_BIT, target_layout); + } + att_state->current_layout = target_layout; + + if (att_state->pending_clear_aspects & VK_IMAGE_ASPECT_COLOR_BIT) { + assert(att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT); + + /* Multi-planar images are not supported as attachments */ + assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); + assert(image->n_planes == 1); + + if (layer_count > 0) { assert(image->n_planes == 1); anv_image_clear_color(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT, att_state->aux_usage, -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 13/14] anv/cmd_buffer: Do subpass image transitions in begin/end_subpass
--- src/intel/vulkan/genX_cmd_buffer.c | 190 + 1 file changed, 68 insertions(+), 122 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 2d17c28..2732ef3 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3257,120 +3257,6 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.hiz_enabled = info.hiz_usage == ISL_AUX_USAGE_HIZ; } - -/** - * @brief Perform any layout transitions required at the beginning and/or end - *of the current subpass for depth buffers. - * - * TODO: Consider preprocessing the attachment reference array at render pass - * create time to determine if no layout transition is needed at the - * beginning and/or end of each subpass. - * - * @param cmd_buffer The command buffer the transition is happening within. - * @param subpass_end If true, marks that the transition is happening at the - *end of the subpass. - */ -static void -cmd_buffer_subpass_transition_layouts(struct anv_cmd_buffer * const cmd_buffer, - const bool subpass_end) -{ - /* We need a non-NULL command buffer. */ - assert(cmd_buffer); - - const struct anv_cmd_state * const cmd_state = _buffer->state; - const struct anv_subpass * const subpass = cmd_state->subpass; - - /* This function must be called within a subpass. */ - assert(subpass); - - /* If there are attachment references, the array shouldn't be NULL. -*/ - if (subpass->attachment_count > 0) - assert(subpass->attachments); - - /* Iterate over the array of attachment references. */ - for (const struct anv_subpass_attachment *att_ref = subpass->attachments; -att_ref < subpass->attachments + subpass->attachment_count; att_ref++) { - - /* If the attachment is unused, we can't perform a layout transition. */ - if (att_ref->attachment == VK_ATTACHMENT_UNUSED) - continue; - - /* This attachment index shouldn't go out of bounds. */ - assert(att_ref->attachment < cmd_state->pass->attachment_count); - - const struct anv_render_pass_attachment * const att_desc = - _state->pass->attachments[att_ref->attachment]; - struct anv_attachment_state * const att_state = - _buffer->state.attachments[att_ref->attachment]; - - /* The attachment should not be used in a subpass after its last. */ - assert(att_desc->last_subpass_idx >= anv_get_subpass_id(cmd_state)); - - if (subpass_end && anv_get_subpass_id(cmd_state) < - att_desc->last_subpass_idx) { - /* We're calling this function on a buffer twice in one subpass and - * this is not the last use of the buffer. The layout should not have - * changed from the first call and no transition is necessary. - */ - assert(att_state->current_layout == att_ref->layout || -att_state->current_layout == -VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - continue; - } - - /* The attachment index must be less than the number of attachments - * within the framebuffer. - */ - assert(att_ref->attachment < cmd_state->framebuffer->attachment_count); - - const struct anv_image_view * const iview = - cmd_state->framebuffer->attachments[att_ref->attachment]; - const struct anv_image * const image = iview->image; - - /* Get the appropriate target layout for this attachment. */ - VkImageLayout target_layout; - - /* A resolve is necessary before use as an input attachment if the clear - * color or auxiliary buffer usage isn't supported by the sampler. - */ - const bool input_needs_resolve = -(att_state->fast_clear && !att_state->clear_color_is_zero_one) || -att_state->input_aux_usage != att_state->aux_usage; - if (subpass_end) { - target_layout = att_desc->final_layout; - } else if (iview->aspect_mask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV && - !input_needs_resolve) { - /* Layout transitions before the final only help to enable sampling as - * an input attachment. If the input attachment supports sampling - * using the auxiliary surface, we can skip such transitions by making - * the target layout one that is CCS-aware. - */ - target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - } else { - target_layout = att_ref->layout; - } - - /* Perform the layout transition. */ - if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { - transition_depth_buffer(cmd_buffer, image, - att_state->current_layout, target_layout); - att_state->aux_usage = -anv_layout_to_aux_usage(_buffer->device->info, image, -VK_IMAGE_ASPECT_DEPTH_BIT,
[Mesa-dev] [PATCH 01/14] anv/cmd_buffer: Apply subpass flushes before set_subpass
This seems slightly more correct because it means that the flushes happen before any clears or resolves implied by the subpass transition. --- src/intel/vulkan/genX_cmd_buffer.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 7c86d70..c234300 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3502,10 +3502,10 @@ void genX(CmdBeginRenderPass)( genX(flush_pipeline_select_3d)(cmd_buffer); - genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses); - cmd_buffer->state.pending_pipe_bits |= cmd_buffer->state.pass->subpass_flushes[0]; + + genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses); } void genX(CmdNextSubpass)( @@ -3525,11 +3525,11 @@ void genX(CmdNextSubpass)( */ cmd_buffer_subpass_transition_layouts(cmd_buffer, true); - genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1); - uint32_t subpass_id = anv_get_subpass_id(_buffer->state); cmd_buffer->state.pending_pipe_bits |= cmd_buffer->state.pass->subpass_flushes[subpass_id]; + + genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1); } void genX(CmdEndRenderPass)( -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 04/14] anv/cmd_buffer: Move the color portion of clear_subpass into begin_subpass
This doesn't really change much now but it will give us more/better control over clears in the future. The one interesting functional change here is that we are now re-emitting 3DSTATE_DEPTH_BUFFERS and friends for each clear. However, this only happens at begin_subpass time so it shouldn't be substantially more expensive. --- src/intel/vulkan/anv_blorp.c | 124 ++--- src/intel/vulkan/anv_private.h | 8 +++ src/intel/vulkan/genX_cmd_buffer.c | 54 +++- 3 files changed, 94 insertions(+), 92 deletions(-) diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index d38b343..fd32227 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -1142,17 +1142,6 @@ subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer) const struct anv_cmd_state *cmd_state = _buffer->state; uint32_t ds = cmd_state->subpass->depth_stencil_attachment.attachment; - for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { - uint32_t a = cmd_state->subpass->color_attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) - continue; - - assert(a < cmd_state->pass->attachment_count); - if (cmd_state->attachments[a].pending_clear_aspects) { - return true; - } - } - if (ds != VK_ATTACHMENT_UNUSED) { assert(ds < cmd_state->pass->attachment_count); if (cmd_state->attachments[ds].pending_clear_aspects) @@ -1186,86 +1175,6 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) }; struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { - const uint32_t a = cmd_state->subpass->color_attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) - continue; - - assert(a < cmd_state->pass->attachment_count); - struct anv_attachment_state *att_state = _state->attachments[a]; - - if (!att_state->pending_clear_aspects) - continue; - - assert(att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT); - - struct anv_image_view *iview = fb->attachments[a]; - const struct anv_image *image = iview->image; - struct blorp_surf surf; - get_blorp_surf_for_anv_image(cmd_buffer->device, - image, VK_IMAGE_ASPECT_COLOR_BIT, - att_state->aux_usage, ); - - uint32_t base_layer = iview->planes[0].isl.base_array_layer; - uint32_t layer_count = fb->layers; - - if (att_state->fast_clear) { - surf.clear_color = vk_to_isl_color(att_state->clear_value.color); - - /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": - * - *"After Render target fast clear, pipe-control with color cache - *write-flush must be issued before sending any DRAW commands on - *that render target." - * - * This comment is a bit cryptic and doesn't really tell you what's - * going or what's really needed. It appears that fast clear ops are - * not properly synchronized with other drawing. This means that we - * cannot have a fast clear operation in the pipe at the same time as - * other regular drawing operations. We need to use a PIPE_CONTROL - * to ensure that the contents of the previous draw hit the render - * target before we resolve and then use a second PIPE_CONTROL after - * the resolve to ensure that it is completed before any additional - * drawing occurs. - */ - cmd_buffer->state.pending_pipe_bits |= -ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; - - /* We only support fast-clears on the first layer */ - assert(iview->planes[0].isl.base_level == 0); - assert(iview->planes[0].isl.base_array_layer == 0); - - assert(image->n_planes == 1); - blorp_fast_clear(, , iview->planes[0].isl.format, 0, 0, 1, - render_area.offset.x, render_area.offset.y, - render_area.offset.x + render_area.extent.width, - render_area.offset.y + render_area.extent.height); - base_layer++; - layer_count--; - - cmd_buffer->state.pending_pipe_bits |= -ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; - } - - if (layer_count > 0) { - assert(image->n_planes == 1); - anv_cmd_buffer_mark_image_written(cmd_buffer, image, - VK_IMAGE_ASPECT_COLOR_BIT, - att_state->aux_usage, - iview->planes[0].isl.base_level, - base_layer, layer_count); - - blorp_clear(, , iview->planes[0].isl.format, -
[Mesa-dev] [PATCH 07/14] anv/cmd_buffer: Decide whether or not to HiZ clear up-front
This moves the decision out of begin_subpass and into BeginRenderPass like the decision for color clears. We use a similar name for the function for depth/stencil as for color even though no aux usage is really getting computed. --- src/intel/vulkan/genX_cmd_buffer.c | 84 +++--- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 21fdc6b..ab79fbf 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -350,6 +350,52 @@ color_attachment_compute_aux_usage(struct anv_device * device, } } +static void +depth_stencil_attachment_compute_aux_usage(struct anv_device *device, + struct anv_cmd_state *cmd_state, + uint32_t att, VkRect2D render_area) +{ + struct anv_attachment_state *att_state = _state->attachments[att]; + struct anv_image_view *iview = cmd_state->framebuffer->attachments[att]; + + /* These will be initialized after the first subpass transition. */ + att_state->aux_usage = ISL_AUX_USAGE_NONE; + att_state->input_aux_usage = ISL_AUX_USAGE_NONE; + + if (att_state->aux_usage != ISL_AUX_USAGE_HIZ) { + att_state->fast_clear = false; + return; + } else if (!(att_state->pending_clear_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { + /* If we're just clearing stencil, we can always HiZ clear */ + att_state->fast_clear = true; + return; + } + + if (!blorp_can_hiz_clear_depth(GEN_GEN, + iview->planes[0].isl.format, + iview->image->samples, + render_area.offset.x, + render_area.offset.y, + render_area.offset.x + + render_area.extent.width, + render_area.offset.y + + render_area.extent.height)) { + att_state->fast_clear = false; + } else if (att_state->clear_value.depthStencil.depth != ANV_HZ_FC_VAL) { + att_state->fast_clear = false; + } else if (GEN_GEN == 8 && + anv_can_sample_with_hiz(>info, iview->image)) { + /* Only gen9+ supports returning ANV_HZ_FC_VAL when sampling a + * fast-cleared portion of a HiZ buffer. Testing has revealed that Gen8 + * only supports returning 0.0f. Gens prior to gen8 do not support this + * feature at all. + */ + att_state->fast_clear = false; + } else { + att_state->fast_clear = true; + } +} + static bool need_input_attachment_state(const struct anv_render_pass_attachment *att) { @@ -1125,12 +1171,9 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer, add_image_view_relocs(cmd_buffer, iview, 0, state->attachments[i].color); } else { -/* This field will be initialized after the first subpass - * transition. - */ -state->attachments[i].aux_usage = ISL_AUX_USAGE_NONE; - -state->attachments[i].input_aux_usage = ISL_AUX_USAGE_NONE; +depth_stencil_attachment_compute_aux_usage(cmd_buffer->device, + state, i, + begin->renderArea); } if (need_input_attachment_state(>attachments[i])) { @@ -3541,34 +3584,7 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, VK_IMAGE_ASPECT_STENCIL_BIT)); if (att_state->pending_clear_aspects) { - bool clear_with_hiz = att_state->aux_usage == ISL_AUX_USAGE_HIZ; - if (clear_with_hiz && - (att_state->pending_clear_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { -if (!blorp_can_hiz_clear_depth(GEN_GEN, - iview->planes[0].isl.format, - iview->image->samples, - render_area.offset.x, - render_area.offset.y, - render_area.offset.x + - render_area.extent.width, - render_area.offset.y + - render_area.extent.height)) { - clear_with_hiz = false; -} else if (att_state->clear_value.depthStencil.depth != ANV_HZ_FC_VAL) { - clear_with_hiz = false; -} else if (GEN_GEN == 8 && - anv_can_sample_with_hiz(_buffer->device->info, - iview->image)) { - /* Only gen9+ supports returning ANV_HZ_FC_VAL when sampling a -* fast-cleared
[Mesa-dev] [PATCH 02/14] anv/cmd_buffer: Add begin/end_subpass helpers
Having begin/end_subpass is a bit nicer than the begin/next/end hooks that Vulkan gives us. --- src/intel/vulkan/genX_cmd_buffer.c | 55 +- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index c234300..3e37eaf 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3432,10 +3432,11 @@ cmd_buffer_subpass_sync_fast_clear_values(struct anv_cmd_buffer *cmd_buffer) static void -genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) +cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) { cmd_buffer->state.subpass = subpass; + uint32_t subpass_id = anv_get_subpass_id(_buffer->state); cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS; @@ -3460,6 +3461,10 @@ genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, */ cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE; + /* Accumulate any subpass flushes that need to happen before the subpass */ + cmd_buffer->state.pending_pipe_bits |= + cmd_buffer->state.pass->subpass_flushes[subpass_id]; + /* Perform transitions to the subpass layout before any writes have * occurred. */ @@ -3479,6 +3484,26 @@ genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, anv_cmd_buffer_clear_subpass(cmd_buffer); } +static void +cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer) +{ + uint32_t subpass_id = anv_get_subpass_id(_buffer->state); + + anv_cmd_buffer_resolve_subpass(cmd_buffer); + + /* Perform transitions to the final layout after all writes have occurred. +*/ + cmd_buffer_subpass_transition_layouts(cmd_buffer, true); + + /* Accumulate any subpass flushes that need to happen after the subpass. +* Yes, they do get accumulated twice in the NextSubpass case but since +* genX_CmdNextSubpass just calls end/begin back-to-back, we just end up +* ORing the bits in twice so it's harmless. +*/ + cmd_buffer->state.pending_pipe_bits |= + cmd_buffer->state.pass->subpass_flushes[subpass_id + 1]; +} + void genX(CmdBeginRenderPass)( VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo*pRenderPassBegin, @@ -3502,10 +3527,7 @@ void genX(CmdBeginRenderPass)( genX(flush_pipeline_select_3d)(cmd_buffer); - cmd_buffer->state.pending_pipe_bits |= - cmd_buffer->state.pass->subpass_flushes[0]; - - genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses); + cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); } void genX(CmdNextSubpass)( @@ -3519,17 +3541,9 @@ void genX(CmdNextSubpass)( assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - anv_cmd_buffer_resolve_subpass(cmd_buffer); - - /* Perform transitions to the final layout after all writes have occurred. -*/ - cmd_buffer_subpass_transition_layouts(cmd_buffer, true); - - uint32_t subpass_id = anv_get_subpass_id(_buffer->state); - cmd_buffer->state.pending_pipe_bits |= - cmd_buffer->state.pass->subpass_flushes[subpass_id]; + cmd_buffer_end_subpass(cmd_buffer); - genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1); + cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); } void genX(CmdEndRenderPass)( @@ -3540,14 +3554,7 @@ void genX(CmdEndRenderPass)( if (anv_batch_has_error(_buffer->batch)) return; - anv_cmd_buffer_resolve_subpass(cmd_buffer); - - /* Perform transitions to the final layout after all writes have occurred. -*/ - cmd_buffer_subpass_transition_layouts(cmd_buffer, true); - - cmd_buffer->state.pending_pipe_bits |= - cmd_buffer->state.pass->subpass_flushes[cmd_buffer->state.pass->subpass_count]; + cmd_buffer_end_subpass(cmd_buffer); cmd_buffer->state.hiz_enabled = false; -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 12/14] anv/cmd_buffer: Mark depth/stencil surfaces written in begin_subpass
--- src/intel/vulkan/genX_cmd_buffer.c | 50 ++ 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 4eee85a..2d17c28 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3255,27 +3255,6 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) isl_emit_depth_stencil_hiz_s(>isl_dev, dw, ); cmd_buffer->state.hiz_enabled = info.hiz_usage == ISL_AUX_USAGE_HIZ; - - /* We may be writing depth or stencil so we need to mark the surface. -* Unfortunately, there's no way to know at this point whether the depth or -* stencil tests used will actually write to the surface. -*/ - if (image && (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { - genX(cmd_buffer_mark_image_written)(cmd_buffer, image, - VK_IMAGE_ASPECT_DEPTH_BIT, - info.hiz_usage, - info.view->base_level, - info.view->base_array_layer, - info.view->array_len); - } - if (image && (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT)) { - genX(cmd_buffer_mark_image_written)(cmd_buffer, image, - VK_IMAGE_ASPECT_STENCIL_BIT, - ISL_AUX_USAGE_NONE, - info.view->base_level, - info.view->base_array_layer, - info.view->array_len); - } } @@ -3550,6 +3529,35 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, iview->planes[0].isl.base_level, iview->planes[0].isl.base_array_layer, fb->layers); + } else if (subpass->attachments[i].usage == + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + /* We may be writing depth or stencil so we need to mark the surface. + * Unfortunately, there's no way to know at this point whether the + * depth or stencil tests used will actually write to the surface. + * + * Even though stencil may be plane 1, it always shares a base_level + * with depth. + */ + const struct isl_view *ds_view = >planes[0].isl; + if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { +genX(cmd_buffer_mark_image_written)(cmd_buffer, image, +VK_IMAGE_ASPECT_DEPTH_BIT, +att_state->aux_usage, +ds_view->base_level, +ds_view->base_array_layer, +fb->layers); + } + if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { +/* Even though stencil may be plane 1, it always shares a + * base_level with depth. + */ +genX(cmd_buffer_mark_image_written)(cmd_buffer, image, +VK_IMAGE_ASPECT_STENCIL_BIT, +ISL_AUX_USAGE_NONE, +ds_view->base_level, +ds_view->base_array_layer, +fb->layers); + } } att_state->pending_clear_aspects = 0; -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 03/14] anv/cmd_buffer: Pass a subpass id into begin_subpass
This is a bit less awkward than passing in the subpass because it means we don't have to extract the subpass id from the subpass. --- src/intel/vulkan/genX_cmd_buffer.c | 12 +--- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 3e37eaf..519d14f 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3430,13 +3430,11 @@ cmd_buffer_subpass_sync_fast_clear_values(struct anv_cmd_buffer *cmd_buffer) } } - static void cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) + uint32_t subpass_id) { - cmd_buffer->state.subpass = subpass; - uint32_t subpass_id = anv_get_subpass_id(_buffer->state); + cmd_buffer->state.subpass = _buffer->state.pass->subpasses[subpass_id]; cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS; @@ -3527,7 +3525,7 @@ void genX(CmdBeginRenderPass)( genX(flush_pipeline_select_3d)(cmd_buffer); - cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); + cmd_buffer_begin_subpass(cmd_buffer, 0); } void genX(CmdNextSubpass)( @@ -3541,9 +3539,9 @@ void genX(CmdNextSubpass)( assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + uint32_t prev_subpass = anv_get_subpass_id(_buffer->state); cmd_buffer_end_subpass(cmd_buffer); - - cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); + cmd_buffer_begin_subpass(cmd_buffer, prev_subpass + 1); } void genX(CmdEndRenderPass)( -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 11/14] anv/cmd_buffer: Sync clear values in begin_subpass
This is quite a bit cleaner because we now sync the clear values at the same time as we do the fast clear. For loading the clear values into the surface state, we now do it once when we handle the LOAD_OP_LOAD instead of every subpass. --- src/intel/vulkan/genX_cmd_buffer.c | 148 - 1 file changed, 48 insertions(+), 100 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index f92e86f..4eee85a 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3392,97 +3392,6 @@ cmd_buffer_subpass_transition_layouts(struct anv_cmd_buffer * const cmd_buffer, } } -/* Update the clear value dword(s) in surface state objects or the fast clear - * state buffer entry for the color attachments used in this subpass. - */ -static void -cmd_buffer_subpass_sync_fast_clear_values(struct anv_cmd_buffer *cmd_buffer) -{ - assert(cmd_buffer && cmd_buffer->state.subpass); - - const struct anv_cmd_state *state = _buffer->state; - - /* Iterate through every color attachment used in this subpass. */ - for (uint32_t i = 0; i < state->subpass->color_count; ++i) { - - /* The attachment should be one of the attachments described in the - * render pass and used in the subpass. - */ - const uint32_t a = state->subpass->color_attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) - continue; - - assert(a < state->pass->attachment_count); - - /* Store some information regarding this attachment. */ - const struct anv_attachment_state *att_state = >attachments[a]; - const struct anv_image_view *iview = state->framebuffer->attachments[a]; - const struct anv_render_pass_attachment *rp_att = - >pass->attachments[a]; - - if (att_state->aux_usage == ISL_AUX_USAGE_NONE) - continue; - - /* The fast clear state entry must be updated if a fast clear is going to - * happen. The surface state must be updated if the clear value from a - * prior fast clear may be needed. - */ - if (att_state->pending_clear_aspects && att_state->fast_clear) { - /* Update the fast clear state entry. */ - genX(copy_fast_clear_dwords)(cmd_buffer, att_state->color.state, - iview->image, - VK_IMAGE_ASPECT_COLOR_BIT, - true /* copy from ss */); - - /* Fast-clears impact whether or not a resolve will be necessary. */ - if (att_state->clear_color_is_zero) { -/* This image always has the auxiliary buffer enabled. We can mark - * the subresource as not needing a resolve because the clear color - * will match what's in every RENDER_SURFACE_STATE object when it's - * being used for sampling. - */ -set_image_fast_clear_state(cmd_buffer, iview->image, - VK_IMAGE_ASPECT_COLOR_BIT, - ANV_FAST_CLEAR_DEFAULT_VALUE); - } else { -set_image_fast_clear_state(cmd_buffer, iview->image, - VK_IMAGE_ASPECT_COLOR_BIT, - ANV_FAST_CLEAR_ANY); - } - } else if (rp_att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD && - iview->planes[0].isl.base_level == 0 && - iview->planes[0].isl.base_array_layer == 0) { - /* The attachment may have been fast-cleared in a previous render - * pass and the value is needed now. Update the surface state(s). - * - * TODO: Do this only once per render pass instead of every subpass. - */ - genX(copy_fast_clear_dwords)(cmd_buffer, att_state->color.state, - iview->image, - VK_IMAGE_ASPECT_COLOR_BIT, - false /* copy to ss */); - - if (need_input_attachment_state(rp_att) && - att_state->input_aux_usage != ISL_AUX_USAGE_NONE) { -genX(copy_fast_clear_dwords)(cmd_buffer, att_state->input.state, - iview->image, - VK_IMAGE_ASPECT_COLOR_BIT, - false /* copy to ss */); - } - } - - /* We assume that if we're starting a subpass, we're going to do some - * rendering so we may end up with compressed data. - */ - genX(cmd_buffer_mark_image_written)(cmd_buffer, iview->image, - VK_IMAGE_ASPECT_COLOR_BIT, - att_state->aux_usage, - iview->planes[0].isl.base_level, - iview->planes[0].isl.base_array_layer, -
[Mesa-dev] [PATCH 05/14] intel/blorp: Add a blorp_hiz_clear_depth_stencil helper
This is similar to blorp_gen8_hiz_clear_attachments except that it takes actual images instead of trusting in the already set depth state. --- src/intel/blorp/blorp.h | 11 ++ src/intel/blorp/blorp_clear.c | 50 +++ 2 files changed, 61 insertions(+) diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h index ce3762c..4626f2f 100644 --- a/src/intel/blorp/blorp.h +++ b/src/intel/blorp/blorp.h @@ -170,6 +170,17 @@ blorp_can_hiz_clear_depth(uint8_t gen, enum isl_format format, uint32_t num_samples, uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1); +void +blorp_hiz_clear_depth_stencil(struct blorp_batch *batch, + const struct blorp_surf *depth, + const struct blorp_surf *stencil, + uint32_t level, + uint32_t start_layer, uint32_t num_layers, + uint32_t x0, uint32_t y0, + uint32_t x1, uint32_t y1, + bool clear_depth, float depth_value, + bool clear_stencil, uint8_t stencil_value); + void blorp_gen8_hiz_clear_attachments(struct blorp_batch *batch, diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c index 32ec31b..ccbbda0 100644 --- a/src/intel/blorp/blorp_clear.c +++ b/src/intel/blorp/blorp_clear.c @@ -612,6 +612,56 @@ blorp_can_hiz_clear_depth(uint8_t gen, enum isl_format format, return true; } +void +blorp_hiz_clear_depth_stencil(struct blorp_batch *batch, + const struct blorp_surf *depth, + const struct blorp_surf *stencil, + uint32_t level, + uint32_t start_layer, uint32_t num_layers, + uint32_t x0, uint32_t y0, + uint32_t x1, uint32_t y1, + bool clear_depth, float depth_value, + bool clear_stencil, uint8_t stencil_value) +{ + struct blorp_params params; + blorp_params_init(); + + /* This requires WM_HZ_OP which only exists on gen8+ */ + assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 8); + + params.hiz_op = ISL_AUX_OP_FAST_CLEAR; + params.num_layers = 1; + + params.x0 = x0; + params.y0 = y0; + params.x1 = x1; + params.y1 = y1; + + for (uint32_t l = 0; l < num_layers; l++) { + const uint32_t layer = start_layer + l; + if (clear_stencil) { + brw_blorp_surface_info_init(batch->blorp, , stencil, + level, layer, + ISL_FORMAT_UNSUPPORTED, true); + params.stencil_mask = 0xff; + params.stencil_ref = stencil_value; + params.num_samples = params.stencil.surf.samples; + } + + if (clear_depth) { + brw_blorp_surface_info_init(batch->blorp, , depth, + level, layer, + ISL_FORMAT_UNSUPPORTED, true); + params.depth.clear_color.f32[0] = depth_value; + params.depth_format = +isl_format_get_depth_format(depth->surf->format, false); + params.num_samples = params.depth.surf.samples; + } + + batch->blorp->exec(batch, ); + } +} + /* Given a depth stencil attachment, this function performs a fast depth clear * on a depth portion and a regular clear on the stencil portion. When * performing a fast depth clear on the depth portion, the HiZ buffer is simply -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 09/14] anv/cmd_buffer: Add a concept of pending load aspects
These are the same as pending clear aspects only for the "load" operation. --- src/intel/vulkan/anv_private.h | 1 + src/intel/vulkan/genX_cmd_buffer.c | 22 -- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 906c6f3..d424498 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1678,6 +1678,7 @@ struct anv_attachment_state { VkImageLayoutcurrent_layout; VkImageAspectFlags pending_clear_aspects; + VkImageAspectFlags pending_load_aspects; bool fast_clear; VkClearValue clear_value; bool clear_color_is_zero_one; diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 608f5ee..2590ea3 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1123,26 +1123,36 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer, struct anv_render_pass_attachment *att = >attachments[i]; VkImageAspectFlags att_aspects = vk_format_aspects(att->format); VkImageAspectFlags clear_aspects = 0; + VkImageAspectFlags load_aspects = 0; if (att_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { /* color attachment */ if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; +} else if (att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) { + load_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; } } else { /* depthstencil attachment */ -if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && -att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; +if (att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; + } else if (att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) { + load_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; + } } -if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && -att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; +if (att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + if (att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; + } else if (att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD) { + load_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; + } } } state->attachments[i].current_layout = att->initial_layout; state->attachments[i].pending_clear_aspects = clear_aspects; + state->attachments[i].pending_load_aspects = load_aspects; if (clear_aspects) state->attachments[i].clear_value = begin->pClearValues[i]; -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 10/14] anv/pass: Store usage in each subpass attachment
This requires us to ditch the VkAttachmentReference struct in favor of an anv-specific struct. However, we can now easily identify from just the subpass attachment what kind of an attachment it is. This will make iteration over anv_subpass::attachments a little easier in some case. --- src/intel/vulkan/anv_pass.c| 35 +++ src/intel/vulkan/anv_private.h | 16 +++- src/intel/vulkan/genX_cmd_buffer.c | 2 +- 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c index a77e52b..5b8b138 100644 --- a/src/intel/vulkan/anv_pass.c +++ b/src/intel/vulkan/anv_pass.c @@ -65,7 +65,7 @@ VkResult anv_CreateRenderPass( anv_multialloc_add(, , pCreateInfo->attachmentCount); anv_multialloc_add(, _flushes, pCreateInfo->subpassCount + 1); - VkAttachmentReference *subpass_attachments; + struct anv_subpass_attachment *subpass_attachments; uint32_t subpass_attachment_count = 0; for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { subpass_attachment_count += @@ -117,7 +117,11 @@ VkResult anv_CreateRenderPass( for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { uint32_t a = desc->pInputAttachments[j].attachment; -subpass->input_attachments[j] = desc->pInputAttachments[j]; +subpass->input_attachments[j] = (struct anv_subpass_attachment) { + .usage = VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT, + .attachment = desc->pInputAttachments[j].attachment, + .layout = desc->pInputAttachments[j].layout, +}; if (a != VK_ATTACHMENT_UNUSED) { has_input = true; pass->attachments[a].usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; @@ -138,7 +142,11 @@ VkResult anv_CreateRenderPass( for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { uint32_t a = desc->pColorAttachments[j].attachment; -subpass->color_attachments[j] = desc->pColorAttachments[j]; +subpass->color_attachments[j] = (struct anv_subpass_attachment) { + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .attachment = desc->pColorAttachments[j].attachment, + .layout = desc->pColorAttachments[j].layout, +}; if (a != VK_ATTACHMENT_UNUSED) { has_color = true; pass->attachments[a].usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; @@ -157,7 +165,11 @@ VkResult anv_CreateRenderPass( for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { uint32_t a = desc->pResolveAttachments[j].attachment; -subpass->resolve_attachments[j] = desc->pResolveAttachments[j]; +subpass->resolve_attachments[j] = (struct anv_subpass_attachment) { + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, + .attachment = desc->pResolveAttachments[j].attachment, + .layout = desc->pResolveAttachments[j].layout, +}; if (a != VK_ATTACHMENT_UNUSED) { subpass->has_resolve = true; uint32_t color_att = desc->pColorAttachments[j].attachment; @@ -174,8 +186,12 @@ VkResult anv_CreateRenderPass( if (desc->pDepthStencilAttachment) { uint32_t a = desc->pDepthStencilAttachment->attachment; - *subpass_attachments++ = subpass->depth_stencil_attachment = -*desc->pDepthStencilAttachment; + subpass->depth_stencil_attachment = (struct anv_subpass_attachment) { +.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, +.attachment = desc->pDepthStencilAttachment->attachment, +.layout = desc->pDepthStencilAttachment->layout, + }; + *subpass_attachments++ = subpass->depth_stencil_attachment; if (a != VK_ATTACHMENT_UNUSED) { has_depth = true; pass->attachments[a].usage |= @@ -186,8 +202,11 @@ VkResult anv_CreateRenderPass( *desc->pDepthStencilAttachment); } } else { - subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; - subpass->depth_stencil_attachment.layout = VK_IMAGE_LAYOUT_UNDEFINED; + subpass->depth_stencil_attachment = (struct anv_subpass_attachment) { +.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, +.attachment = VK_ATTACHMENT_UNUSED, +.layout = VK_IMAGE_LAYOUT_UNDEFINED, + }; } } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index d424498..9a8da2b 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2865,6 +2865,12 @@ struct anv_framebuffer { struct anv_image_view * attachments[0]; }; +struct anv_subpass_attachment { +
[Mesa-dev] [PATCH 08/14] anv/cmd_buffer: Iterate all subpass attachments when clearing
This unifies things a bit because we now handle depth and stencil at the same time. It also ensures that clears happen for input attachments. --- src/intel/vulkan/genX_cmd_buffer.c | 77 -- 1 file changed, 32 insertions(+), 45 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index ab79fbf..608f5ee 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3524,66 +3524,51 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, VkRect2D render_area = cmd_buffer->state.render_area; struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - for (uint32_t i = 0; i < subpass->color_count; ++i) { - const uint32_t a = subpass->color_attachments[i].attachment; + + for (uint32_t i = 0; i < subpass->attachment_count; ++i) { + const uint32_t a = subpass->attachments[i].attachment; if (a == VK_ATTACHMENT_UNUSED) continue; assert(a < cmd_state->pass->attachment_count); struct anv_attachment_state *att_state = _state->attachments[a]; - if (!att_state->pending_clear_aspects) - continue; - - assert(att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT); - struct anv_image_view *iview = fb->attachments[a]; const struct anv_image *image = iview->image; - /* Multi-planar images are not supported as attachments */ - assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); - assert(image->n_planes == 1); - - uint32_t base_layer = iview->planes[0].isl.base_array_layer; - uint32_t layer_count = fb->layers; + if (att_state->pending_clear_aspects & VK_IMAGE_ASPECT_COLOR_BIT) { + assert(att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT); - if (att_state->fast_clear) { - /* We only support fast-clears on the first layer */ - assert(iview->planes[0].isl.base_level == 0); - assert(iview->planes[0].isl.base_array_layer == 0); - - anv_image_ccs_op(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT, - 0, 0, 1, ISL_AUX_OP_FAST_CLEAR, false); - base_layer++; - layer_count--; - } - - if (layer_count > 0) { + /* Multi-planar images are not supported as attachments */ + assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); assert(image->n_planes == 1); - anv_image_clear_color(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT, - att_state->aux_usage, - iview->planes[0].isl.format, - iview->planes[0].isl.swizzle, - iview->planes[0].isl.base_level, - base_layer, layer_count, render_area, - vk_to_isl_color(att_state->clear_value.color)); - } - - att_state->pending_clear_aspects = 0; - } - if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { - const uint32_t a = subpass->depth_stencil_attachment.attachment; + uint32_t base_layer = iview->planes[0].isl.base_array_layer; + uint32_t layer_count = fb->layers; - assert(a < cmd_state->pass->attachment_count); - struct anv_attachment_state *att_state = _state->attachments[a]; - struct anv_image_view *iview = fb->attachments[a]; - const struct anv_image *image = iview->image; + if (att_state->fast_clear) { +/* We only support fast-clears on the first layer */ +assert(iview->planes[0].isl.base_level == 0); +assert(iview->planes[0].isl.base_array_layer == 0); - assert(image->aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT)); +anv_image_ccs_op(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT, + 0, 0, 1, ISL_AUX_OP_FAST_CLEAR, false); +base_layer++; +layer_count--; + } - if (att_state->pending_clear_aspects) { + if (layer_count > 0) { +assert(image->n_planes == 1); +anv_image_clear_color(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT, + att_state->aux_usage, + iview->planes[0].isl.format, + iview->planes[0].isl.swizzle, + iview->planes[0].isl.base_level, + base_layer, layer_count, render_area, + vk_to_isl_color(att_state->clear_value.color)); + } + } else if (att_state->pending_clear_aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)) { if (att_state->fast_clear) { /* We currently only support HiZ for single-layer images */
[Mesa-dev] [PATCH 06/14] anv/cmd_buffer: Move the rest of clear_subpass into begin_subpass
--- src/intel/vulkan/anv_blorp.c | 243 - src/intel/vulkan/anv_private.h | 17 ++- src/intel/vulkan/genX_cmd_buffer.c | 68 ++- 3 files changed, 188 insertions(+), 140 deletions(-) diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index fd32227..4018476 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -1136,143 +1136,6 @@ enum subpass_stage { SUBPASS_STAGE_RESOLVE, }; -static bool -subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer) -{ - const struct anv_cmd_state *cmd_state = _buffer->state; - uint32_t ds = cmd_state->subpass->depth_stencil_attachment.attachment; - - if (ds != VK_ATTACHMENT_UNUSED) { - assert(ds < cmd_state->pass->attachment_count); - if (cmd_state->attachments[ds].pending_clear_aspects) - return true; - } - - return false; -} - -void -anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) -{ - const struct anv_cmd_state *cmd_state = _buffer->state; - const VkRect2D render_area = cmd_buffer->state.render_area; - - - if (!subpass_needs_clear(cmd_buffer)) - return; - - /* Because this gets called within a render pass, we tell blorp not to -* trash our depth and stencil buffers. -*/ - struct blorp_batch batch; - blorp_batch_init(_buffer->device->blorp, , cmd_buffer, -BLORP_BATCH_NO_EMIT_DEPTH_STENCIL); - - VkClearRect clear_rect = { - .rect = cmd_buffer->state.render_area, - .baseArrayLayer = 0, - .layerCount = cmd_buffer->state.framebuffer->layers, - }; - - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - - const uint32_t ds = cmd_state->subpass->depth_stencil_attachment.attachment; - assert(ds == VK_ATTACHMENT_UNUSED || ds < cmd_state->pass->attachment_count); - - if (ds != VK_ATTACHMENT_UNUSED && - cmd_state->attachments[ds].pending_clear_aspects) { - - VkClearAttachment clear_att = { - .aspectMask = cmd_state->attachments[ds].pending_clear_aspects, - .clearValue = cmd_state->attachments[ds].clear_value, - }; - - - const uint8_t gen = cmd_buffer->device->info.gen; - bool clear_with_hiz = gen >= 8 && cmd_state->attachments[ds].aux_usage == -ISL_AUX_USAGE_HIZ; - const struct anv_image_view *iview = fb->attachments[ds]; - - if (clear_with_hiz) { - const bool clear_depth = clear_att.aspectMask & - VK_IMAGE_ASPECT_DEPTH_BIT; - const bool clear_stencil = clear_att.aspectMask & -VK_IMAGE_ASPECT_STENCIL_BIT; - - /* Check against restrictions for depth buffer clearing. A great GPU - * performance benefit isn't expected when using the HZ sequence for - * stencil-only clears. Therefore, we don't emit a HZ op sequence for - * a stencil clear in addition to using the BLORP-fallback for depth. - */ - if (clear_depth) { -if (!blorp_can_hiz_clear_depth(gen, iview->planes[0].isl.format, - iview->image->samples, - render_area.offset.x, - render_area.offset.y, - render_area.offset.x + - render_area.extent.width, - render_area.offset.y + - render_area.extent.height)) { - clear_with_hiz = false; -} else if (clear_att.clearValue.depthStencil.depth != - ANV_HZ_FC_VAL) { - /* Don't enable fast depth clears for any color not equal to -* ANV_HZ_FC_VAL. -*/ - clear_with_hiz = false; -} else if (gen == 8 && - anv_can_sample_with_hiz(_buffer->device->info, - iview->image)) { - /* Only gen9+ supports returning ANV_HZ_FC_VAL when sampling a -* fast-cleared portion of a HiZ buffer. Testing has revealed -* that Gen8 only supports returning 0.0f. Gens prior to gen8 do -* not support this feature at all. -*/ - clear_with_hiz = false; -} - } - - if (clear_with_hiz) { -blorp_gen8_hiz_clear_attachments(, iview->image->samples, - render_area.offset.x, - render_area.offset.y, - render_area.offset.x + - render_area.extent.width, - render_area.offset.y + - render_area.extent.height, -
[Mesa-dev] [PATCH 00/14] anv: Rework subpass resolves and clears
This little series just shuffles code around to make things a bit more clear. (At least I think it does!) The basic idea is to split set_subpass into begin_subpass and end_subpass and then move all of resolve and clear code into those two helpers. This means that we no longer have 3 or 4 different functions that all loop over attachments which we have to call in just the right order. The last patch in this series takes advantage of this new structure by allowing us to avoid the transition from UNDEFINED when LOAD_OP_CLEAR is specified. I highly doubt the performance impact of this will be noticable but it's nice to be able to do. Jason Ekstrand (14): anv/cmd_buffer: Apply subpass flushes before set_subpass anv/cmd_buffer: Add begin/end_subpass helpers anv/cmd_buffer: Pass a subpass id into begin_subpass anv/cmd_buffer: Move the color portion of clear_subpass into begin_subpass intel/blorp: Add a blorp_hiz_clear_depth_stencil helper anv/cmd_buffer: Move the rest of clear_subpass into begin_subpass anv/cmd_buffer: Decide whether or not to HiZ clear up-front anv/cmd_buffer: Iterate all subpass attachments when clearing anv/cmd_buffer: Add a concept of pending load aspects anv/pass: Store usage in each subpass attachment anv/cmd_buffer: Sync clear values in begin_subpass anv/cmd_buffer: Mark depth/stencil surfaces written in begin_subpass anv/cmd_buffer: Do subpass image transitions in begin/end_subpass anv/cmd_buffer: Avoid unnecessary transitions before fast clears src/intel/blorp/blorp.h| 11 + src/intel/blorp/blorp_clear.c | 50 src/intel/vulkan/anv_blorp.c | 367 ++-- src/intel/vulkan/anv_pass.c| 35 ++- src/intel/vulkan/anv_private.h | 42 ++- src/intel/vulkan/genX_cmd_buffer.c | 555 - 6 files changed, 563 insertions(+), 497 deletions(-) -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: rename gl_vertex_array_object::_VertexAttrib -> _VertexArray
Hi Brian, On Monday, 5 February 2018 18:23:17 CET Brian Paul wrote: > Since the type is gl_vertex_array. Update comment to explain that > these arrays are only used by the VBO module. > > Also rename some local variables in _mesa_update_vao_derived_arrays(). Makes sense to me and the change looks good. Reviewed-by: Mathias Fröhlichbest Mathias > --- > src/mesa/main/arrayobj.c | 13 ++--- > src/mesa/main/attrib.c| 2 +- > src/mesa/main/mtypes.h| 4 ++-- > src/mesa/vbo/vbo_exec_array.c | 2 +- > 4 files changed, 10 insertions(+), 11 deletions(-) > > diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c > index 360d097..a6fa33c 100644 > --- a/src/mesa/main/arrayobj.c > +++ b/src/mesa/main/arrayobj.c > @@ -283,8 +283,8 @@ unbind_array_object_vbos(struct gl_context *ctx, struct gl_vertex_array_object * > for (i = 0; i < ARRAY_SIZE(obj->BufferBinding); i++) >_mesa_reference_buffer_object(ctx, >BufferBinding[i].BufferObj, NULL); > > - for (i = 0; i < ARRAY_SIZE(obj->_VertexAttrib); i++) > - _mesa_reference_buffer_object(ctx, >_VertexAttrib[i].BufferObj, NULL); > + for (i = 0; i < ARRAY_SIZE(obj->_VertexArray); i++) > + _mesa_reference_buffer_object(ctx, >_VertexArray[i].BufferObj, NULL); > } > > > @@ -453,14 +453,13 @@ _mesa_update_vao_derived_arrays(struct gl_context *ctx, > > while (arrays) { >const int attrib = u_bit_scan(); > - struct gl_vertex_array *client_array = >_VertexAttrib[attrib]; > - const struct gl_array_attributes *attrib_array = > + struct gl_vertex_array *array = >_VertexArray[attrib]; > + const struct gl_array_attributes *attribs = > >VertexAttrib[attrib]; >const struct gl_vertex_buffer_binding *buffer_binding = > - >BufferBinding[attrib_array->BufferBindingIndex]; > + >BufferBinding[attribs->BufferBindingIndex]; > > - _mesa_update_vertex_array(ctx, client_array, attrib_array, > -buffer_binding); > + _mesa_update_vertex_array(ctx, array, attribs, buffer_binding); > } > } > > diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c > index a9e4a11..8ac5db0 100644 > --- a/src/mesa/main/attrib.c > +++ b/src/mesa/main/attrib.c > @@ -1503,7 +1503,7 @@ copy_array_object(struct gl_context *ctx, > /* skip RefCount */ > > for (i = 0; i < ARRAY_SIZE(src->VertexAttrib); i++) { > - _mesa_copy_vertex_array(ctx, >_VertexAttrib[i], >_VertexAttrib[i]); > + _mesa_copy_vertex_array(ctx, >_VertexArray[i], >_VertexArray[i]); >_mesa_copy_vertex_attrib_array(ctx, >VertexAttrib[i], >VertexAttrib[i]); >_mesa_copy_vertex_buffer_binding(ctx, >BufferBinding[i], >BufferBinding[i]); > } > diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h > index 3a67d43..aa083c3 100644 > --- a/src/mesa/main/mtypes.h > +++ b/src/mesa/main/mtypes.h > @@ -1603,9 +1603,9 @@ struct gl_vertex_array_object > * Derived vertex attribute arrays > * > * This is a legacy data structure created from gl_array_attributes and > -* gl_vertex_buffer_binding, for compatibility with existing driver code. > +* gl_vertex_buffer_binding, only used by the VBO module at this time. > */ > - struct gl_vertex_array _VertexAttrib[VERT_ATTRIB_MAX]; > + struct gl_vertex_array _VertexArray[VERT_ATTRIB_MAX]; > > /** Vertex attribute arrays */ > struct gl_array_attributes VertexAttrib[VERT_ATTRIB_MAX]; > diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c > index 42759d5..a5bedc8 100644 > --- a/src/mesa/vbo/vbo_exec_array.c > +++ b/src/mesa/vbo/vbo_exec_array.c > @@ -314,7 +314,7 @@ recalculate_input_bindings(struct gl_context *ctx) > struct vbo_context *vbo = vbo_context(ctx); > struct vbo_exec_context *exec = >exec; > const struct gl_vertex_array_object *vao = ctx->Array.VAO; > - const struct gl_vertex_array *vertexAttrib = vao->_VertexAttrib; > + const struct gl_vertex_array *vertexAttrib = vao->_VertexArray; > const struct gl_vertex_array **inputs = >array.inputs[0]; > > /* May shuffle the position and generic0 bits around */ > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] mesa: Mute arrays for Bitmap/CopyPixels/DrawPixels callbacks.
From: Mathias FröhlichSet the _DrawArray pointer to NULL when calling into the Drivers Bitmap/CopyPixels/DrawPixels hooks. This fixes an assert that gets uncovered with the following patch gets applied. Signed-off-by: Mathias Fröhlich --- src/mesa/main/drawpix.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/src/mesa/main/drawpix.c b/src/mesa/main/drawpix.c index ec1d2618ca..05a18d3e51 100644 --- a/src/mesa/main/drawpix.c +++ b/src/mesa/main/drawpix.c @@ -37,6 +37,7 @@ #include "dispatch.h" #include "glformats.h" #include "fbobject.h" +#include "varray.h" /* @@ -72,6 +73,9 @@ _mesa_DrawPixels( GLsizei width, GLsizei height, */ _mesa_set_vp_override(ctx, GL_TRUE); + /* Prevent drivers from accessing stale draw array data */ + _mesa_set_drawing_arrays(ctx, NULL); + /* Note: this call does state validation */ if (!_mesa_valid_to_render(ctx, "glDrawPixels")) { goto end; /* the error code was recorded */ @@ -228,6 +232,9 @@ _mesa_CopyPixels( GLint srcx, GLint srcy, GLsizei width, GLsizei height, */ _mesa_set_vp_override(ctx, GL_TRUE); + /* Prevent drivers from accessing stale draw array data */ + _mesa_set_drawing_arrays(ctx, NULL); + /* Note: this call does state validation */ if (!_mesa_valid_to_render(ctx, "glCopyPixels")) { goto end; /* the error code was recorded */ @@ -320,6 +327,9 @@ _mesa_Bitmap( GLsizei width, GLsizei height, if (ctx->RasterDiscard) return; + /* Prevent drivers from accessing stale draw array data */ + _mesa_set_drawing_arrays(ctx, NULL); + if (ctx->RenderMode == GL_RENDER) { /* Truncate, to satisfy conformance tests (matches SGI's OpenGL). */ if (width > 0 && height > 0) { -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] mesa: Fix VAO buffer object tracking.
From: Mathias FröhlichWhen changing the attribute binding in the VAO we also need to account for getting rid of non vbo bits from VertexAttribBufferMask. Signed-off-by: Mathias Fröhlich Reviewed-by: Brian Paul --- src/mesa/main/varray.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c index 81b8fbe8ca..2fd9de630f 100644 --- a/src/mesa/main/varray.c +++ b/src/mesa/main/varray.c @@ -142,6 +142,8 @@ vertex_attrib_binding(struct gl_context *ctx, if (_mesa_is_bufferobj(vao->BufferBinding[bindingIndex].BufferObj)) vao->VertexAttribBufferMask |= array_bit; + else + vao->VertexAttribBufferMask &= ~array_bit; FLUSH_VERTICES(ctx, _NEW_ARRAY); -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] mesa: Only update enabled VAO gl_vertex_array entries.
From: Mathias FröhlichInstead of updating all modified gl_vertex_array_object::_VertexArray entries just update those that are modified and enabled. Also release buffer object from the _VertexArray that belong to disabled attributes. v2: Also set Ptr and Size to zero. Signed-off-by: Mathias Fröhlich Reviewed-by: Brian Paul --- src/mesa/main/varray.c | 8 src/mesa/main/varray.h | 29 ++--- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c index 2fd9de630f..a2d1d74798 100644 --- a/src/mesa/main/varray.c +++ b/src/mesa/main/varray.c @@ -152,7 +152,7 @@ vertex_attrib_binding(struct gl_context *ctx, array->BufferBindingIndex = bindingIndex; - vao->NewArrays |= array_bit; + vao->NewArrays |= vao->_Enabled & array_bit; } } @@ -187,7 +187,7 @@ _mesa_bind_vertex_buffer(struct gl_context *ctx, else vao->VertexAttribBufferMask |= binding->_BoundArrays; - vao->NewArrays |= binding->_BoundArrays; + vao->NewArrays |= vao->_Enabled & binding->_BoundArrays; } } @@ -208,7 +208,7 @@ vertex_binding_divisor(struct gl_context *ctx, if (binding->InstanceDivisor != divisor) { FLUSH_VERTICES(ctx, _NEW_ARRAY); binding->InstanceDivisor = divisor; - vao->NewArrays |= binding->_BoundArrays; + vao->NewArrays |= vao->_Enabled & binding->_BoundArrays; } } @@ -318,7 +318,7 @@ _mesa_update_array_format(struct gl_context *ctx, array->RelativeOffset = relativeOffset; array->_ElementSize = elementSize; - vao->NewArrays |= VERT_BIT(attrib); + vao->NewArrays |= vao->_Enabled & VERT_BIT(attrib); ctx->NewState |= _NEW_ARRAY; } diff --git a/src/mesa/main/varray.h b/src/mesa/main/varray.h index fe7eb81631..ede7a004e4 100644 --- a/src/mesa/main/varray.h +++ b/src/mesa/main/varray.h @@ -58,17 +58,24 @@ _mesa_update_vertex_array(struct gl_context *ctx, const struct gl_array_attributes *attribs, const struct gl_vertex_buffer_binding *binding) { - dst->Size = attribs->Size; - dst->Type = attribs->Type; - dst->Format = attribs->Format; - dst->StrideB = binding->Stride; - dst->Ptr = _mesa_vertex_attrib_address(attribs, binding); - dst->Normalized = attribs->Normalized; - dst->Integer = attribs->Integer; - dst->Doubles = attribs->Doubles; - dst->InstanceDivisor = binding->InstanceDivisor; - dst->_ElementSize = attribs->_ElementSize; - _mesa_reference_buffer_object(ctx, >BufferObj, binding->BufferObj); + if (attribs->Enabled) { + dst->Size = attribs->Size; + dst->Type = attribs->Type; + dst->Format = attribs->Format; + dst->StrideB = binding->Stride; + dst->Ptr = _mesa_vertex_attrib_address(attribs, binding); + dst->Normalized = attribs->Normalized; + dst->Integer = attribs->Integer; + dst->Doubles = attribs->Doubles; + dst->InstanceDivisor = binding->InstanceDivisor; + dst->_ElementSize = attribs->_ElementSize; + _mesa_reference_buffer_object(ctx, >BufferObj, binding->BufferObj); + } else { + /* Disabled arrays shall not be consumed */ + dst->Size = 0; + dst->Ptr = NULL; + _mesa_reference_buffer_object(ctx, >BufferObj, NULL); + } } -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/3] Fix and tweak to the VAO v2
From: Mathias FröhlichHi Brian, Actually after incorporating your review requests to set gl_vertex_array::Size and gl_vertex_array::Ptr to zero, radeonsi started to assert in Bitmap/CopyPixels/DrawPixels. So, here the updated series including the requested changes. And additoinally for review the change to fix the mentioned asserts in several piglit tests. Please review! best Mathias Mathias Fröhlich (3): mesa: Fix VAO buffer object tracking. mesa: Mute arrays for Bitmap/CopyPixels/DrawPixels callbacks. mesa: Only update enabled VAO gl_vertex_array entries. src/mesa/main/drawpix.c | 10 ++ src/mesa/main/varray.c | 10 ++ src/mesa/main/varray.h | 29 ++--- 3 files changed, 34 insertions(+), 15 deletions(-) -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Haiku: convert to autotools
On 2018-02-05 15:39, Dylan Baker wrote: Quoting kallisti5 (2018-02-05 12:58:30) On 2017-10-24 11:47, Emil Velikov wrote: > Hi Jerome, > > On 23 October 2017 at 16:58, Jerome Duval> wrote: >> * configure.ac: >> -pthread is not available on Haiku. >> Haiku doesn't require --enable-dri >> build hgl on Haiku >> * egl/Makefile.am: define backendfiles for Haiku >> * src/gallium/Makefile.am: build winsys/sw/hgl, state_trackers/hgl and >> targets/haiku-softpipe on Haiku. >> * src/gallium/targets/haiku-softpipe: add Makefile.am >> * src/gallium/state_trackers/hgl: add Makefile.am >> * winsys/sw/hgl: add Makefile.am >> * src/hgl/Makefile.am: add Makefile.am >> --- > Thanks for the patch. I think Eric has a point regarding splitting this > up. > Here is one way to handle it: > - patch 1 - the driver, aka st/hgl + sw/hgl + targets/haiku > - 2 - src/egl > - 3 - src/hgl > - 4 misc fixes (the SoftwareRenderer.cpp hunk?) > - 5 toggle - configure.ac + src/Makefile.am Hm, it looks like Jerome never got back to work on these changes... let me try to pick up the ball and run with it. > Couple of small suggestions: > - keep all the sources and headers in the sources lists in > Makefile.sources > - how do you guys manage pthreads - please mention that in the commit > message. > > If I'm reading this correctly, you strip out -pthread and there's no > pthread-stubs on Haiku. Haiku (and BeOS for that matter) has pthread support built into its core libroot.so. No need for -lpthread, all applications can assume its presence. Things that link -lpthread actually fail due to a non-existant libpthread... *however* as i'm typing this i'm being told we recently implemented a dummy static libpthread.a to try and appease assumptions about -lpthread existence so i'll remove the pthread checks :-) -- Alex Hi Alex, I have a branch for building haiku with meson, when I was trying to compile neither the scons build nor the autotools build seemed to compile on a Haiku VM instance (x86_64), that was a few months ago though, so maybe its fixed. Our plan is to remove autotools from mesa, probably this year. I'm thinking if things look pretty good through the 18.0 release cycle I'll probably propose marking autotools as deprecated for 18.1 and propose removal in 18.2. Ah. crap. I just got autoconfig working :-). Historically I have only used SCons for our builds. I always preferred the SCons build since autotools always ends up looking like spaghetti. Here is what our current build does: https://github.com/haikuports/haikuports/blob/master/sys-libs/mesa/mesa-17.1.4.recipe#L52 It looks like Jerome hacked in a patch for autotools... but i've heard some reports of instability with the resulting artifacts. I'm not going to block you guys using autotools or NAK anything, I just want you to be aware that we're trying to consolidate down to just meson and android.mk files. I can respin the haiku patches and CC you if you're interested in looking at them. If Meson is the future, i'm definitely down helping (or even taking over) that branch if it is just incomplete Haiku work. I'm going to try and do better maintenance on Haiku Mesa in 2018. I've been only around minimally in 2017 am a little out of date. You might also want to see if you guys can update your meson, at least last time I checked it was 0.42, and I fixed the pthreads stuff in 0.43 so that -pthread and -lpthread are never added by meson. I just installed meson on Haiku and we are currently at 0.43.0 -- Alex ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] ac/nir: remove emission of nir_op_fdiv
Reviewed-by: Bas NieuwenhuizenOn Mon, Feb 5, 2018 at 9:37 PM, Samuel Pitoiset wrote: > RadeonSI and RADV lower fdiv. > > Signed-off-by: Samuel Pitoiset > --- > src/amd/common/ac_nir_to_llvm.c | 5 - > 1 file changed, 5 deletions(-) > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index b211832b47..e25bae600f 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -1760,11 +1760,6 @@ static void visit_alu(struct ac_nir_context *ctx, > const nir_alu_instr *instr) > src[1] = ac_to_float(>ac, src[1]); > result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], ""); > break; > - case nir_op_fdiv: > - src[0] = ac_to_float(>ac, src[0]); > - src[1] = ac_to_float(>ac, src[1]); > - result = ac_build_fdiv(>ac, src[0], src[1]); > - break; > case nir_op_frcp: > src[0] = ac_to_float(>ac, src[0]); > result = ac_build_fdiv(>ac, > instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1, > -- > 2.16.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] meson: ensure xmlpool/options.h is generated for libgallium
In file included from ../src/gallium/targets/dri/target.c:1: In file included from ../src/gallium/auxiliary/target-helpers/drm_helper.h:8: ../src/util/xmlpool.h:103:10: fatal error: 'xmlpool/options.h' file not found See also 26bde1e3. Signed-off-by: Jon Turney--- src/gallium/targets/dri/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/targets/dri/meson.build b/src/gallium/targets/dri/meson.build index 30368c2152..75ce94ab2c 100644 --- a/src/gallium/targets/dri/meson.build +++ b/src/gallium/targets/dri/meson.build @@ -51,7 +51,7 @@ endif libgallium_dri = shared_library( 'gallium_dri', - files('target.c'), + [files('target.c'), xmlpool_options_h], include_directories : [ inc_common, inc_util, inc_dri_common, inc_gallium_drivers, inc_gallium_winsys, include_directories('../../state_trackers/dri'), -- 2.16.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] util: remove redundant check for the __clang__ macro
In this file there are similar cases with macros PUBLIC, USED and ATTRIBUTE_NOINLINE, before defining which as __attribute__(...), code only checks for __GNUC__. Should I add comments there as well? 2018-02-05 22:51 GMT+02:00 Brian Paul: > On 02/05/2018 01:44 PM, Vlad Golovkin wrote: >> >> Clang defines __GNUC__ macro, so one doesn't need to check __clang__ >> macro in this particular case. > > > Perhaps mention that in a comment below so there's no confusion. > > -Brian > > >> --- >> src/util/macros.h | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/src/util/macros.h b/src/util/macros.h >> index 432d513930..d36ca095d5 100644 >> --- a/src/util/macros.h >> +++ b/src/util/macros.h >> @@ -138,7 +138,7 @@ do { \ >> /* Forced function inlining */ >> #ifndef ALWAYS_INLINE >> -# if defined(__GNUC__) || defined(__clang__) >> +# if defined(__GNUC__) >> #define ALWAYS_INLINE inline __attribute__((always_inline)) >> # elif defined(_MSC_VER) >> #define ALWAYS_INLINE __forceinline >> > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Haiku: convert to autotools
Quoting kallisti5 (2018-02-05 12:58:30) > On 2017-10-24 11:47, Emil Velikov wrote: > > Hi Jerome, > > > > On 23 October 2017 at 16:58, Jerome Duval> > wrote: > >> * configure.ac: > >> -pthread is not available on Haiku. > >> Haiku doesn't require --enable-dri > >> build hgl on Haiku > >> * egl/Makefile.am: define backendfiles for Haiku > >> * src/gallium/Makefile.am: build winsys/sw/hgl, state_trackers/hgl and > >> targets/haiku-softpipe on Haiku. > >> * src/gallium/targets/haiku-softpipe: add Makefile.am > >> * src/gallium/state_trackers/hgl: add Makefile.am > >> * winsys/sw/hgl: add Makefile.am > >> * src/hgl/Makefile.am: add Makefile.am > >> --- > > Thanks for the patch. I think Eric has a point regarding splitting this > > up. > > Here is one way to handle it: > > - patch 1 - the driver, aka st/hgl + sw/hgl + targets/haiku > > - 2 - src/egl > > - 3 - src/hgl > > - 4 misc fixes (the SoftwareRenderer.cpp hunk?) > > - 5 toggle - configure.ac + src/Makefile.am > > Hm, it looks like Jerome never got back to work on these changes... let > me try to > pick up the ball and run with it. > > > Couple of small suggestions: > > - keep all the sources and headers in the sources lists in > > Makefile.sources > > - how do you guys manage pthreads - please mention that in the commit > > message. > > > > If I'm reading this correctly, you strip out -pthread and there's no > > pthread-stubs on Haiku. > > Haiku (and BeOS for that matter) has pthread support built into its core > libroot.so. > > No need for -lpthread, all applications can assume its presence. Things > that link -lpthread actually fail due to a non-existant libpthread... > *however* as i'm typing this i'm being told we recently implemented a > dummy static libpthread.a to try and appease assumptions about -lpthread > existence so i'll remove the pthread checks :-) > > -- Alex Hi Alex, I have a branch for building haiku with meson, when I was trying to compile neither the scons build nor the autotools build seemed to compile on a Haiku VM instance (x86_64), that was a few months ago though, so maybe its fixed. Our plan is to remove autotools from mesa, probably this year. I'm thinking if things look pretty good through the 18.0 release cycle I'll probably propose marking autotools as deprecated for 18.1 and propose removal in 18.2. I'm not going to block you guys using autotools or NAK anything, I just want you to be aware that we're trying to consolidate down to just meson and android.mk files. I can respin the haiku patches and CC you if you're interested in looking at them. You might also want to see if you guys can update your meson, at least last time I checked it was 0.42, and I fixed the pthreads stuff in 0.43 so that -pthread and -lpthread are never added by meson. Dylan signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] ac/nir: remove emission of nir_op_fdiv
Reviewed-by: Timothy ArceriOn 06/02/18 07:37, Samuel Pitoiset wrote: RadeonSI and RADV lower fdiv. Signed-off-by: Samuel Pitoiset --- src/amd/common/ac_nir_to_llvm.c | 5 - 1 file changed, 5 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index b211832b47..e25bae600f 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1760,11 +1760,6 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) src[1] = ac_to_float(>ac, src[1]); result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], ""); break; - case nir_op_fdiv: - src[0] = ac_to_float(>ac, src[0]); - src[1] = ac_to_float(>ac, src[1]); - result = ac_build_fdiv(>ac, src[0], src[1]); - break; case nir_op_frcp: src[0] = ac_to_float(>ac, src[0]); result = ac_build_fdiv(>ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1, ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 7/8] drivers/radeonsi:create uvd hevc enc entry
Add UVD hevc encode pipe video codec creation entry Signed-off-by: James Zhu--- src/gallium/drivers/radeonsi/si_uvd.c | 15 --- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_uvd.c b/src/gallium/drivers/radeonsi/si_uvd.c index 64f2f8e..0dea60d 100644 --- a/src/gallium/drivers/radeonsi/si_uvd.c +++ b/src/gallium/drivers/radeonsi/si_uvd.c @@ -31,6 +31,8 @@ #include "radeon/radeon_vce.h" #include "radeon/radeon_vcn_dec.h" #include "radeon/radeon_vcn_enc.h" +#include "radeon/radeon_uvd_enc.h" +#include "util/u_video.h" /** * creates an video buffer with an UVD compatible memory layout @@ -146,9 +148,16 @@ struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context, struct si_context *ctx = (struct si_context *)context; bool vcn = (ctx->b.family == CHIP_RAVEN) ? true : false; - if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) - return (vcn) ? radeon_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer) : - si_vce_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer); + if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) { + if (vcn) { + radeon_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer); + } else { + if (u_reduce_video_profile(templ->profile) == PIPE_VIDEO_FORMAT_HEVC) + radeon_uvd_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer); + else + si_vce_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer); + } + } return (vcn) ? radeon_create_decoder(context, templ) : si_common_uvd_create_decoder(context, templ, si_uvd_set_dtb); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/8] amdgpu/drm:add uvd hevc enc support in amdgpu cs
On 2018-02-05 12:16 PM, James Zhu wrote: Signed-off-by: James Zhu--- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 1927a3a..6f305b7 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -376,6 +376,7 @@ static bool amdgpu_cs_has_user_fence(struct amdgpu_cs_context *cs) { return cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD && cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCE && + cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD_ENC && cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_DEC && cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_ENC; } @@ -818,6 +819,10 @@ static bool amdgpu_init_cs_context(struct amdgpu_cs_context *cs, cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD; break; + case RING_UVD_ENC: + cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD_ENC; + break; + Please follow previous indentation, use space instead of tab here. Also, the patch name might better be changed to winsys/amdgpu. With those fixed, this patch is Reviewed-by: Boyuan Zhang Thanks, Boyuan case RING_VCE: cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCE; break; @@ -1533,6 +1538,7 @@ static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs, ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4; break; case RING_UVD: + case RING_UVD_ENC: while (rcs->current.cdw & 15) radeon_emit(rcs, 0x8000); /* type2 nop packet */ break; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Haiku: convert to autotools
On 2017-10-24 11:47, Emil Velikov wrote: Hi Jerome, On 23 October 2017 at 16:58, Jerome Duvalwrote: * configure.ac: -pthread is not available on Haiku. Haiku doesn't require --enable-dri build hgl on Haiku * egl/Makefile.am: define backendfiles for Haiku * src/gallium/Makefile.am: build winsys/sw/hgl, state_trackers/hgl and targets/haiku-softpipe on Haiku. * src/gallium/targets/haiku-softpipe: add Makefile.am * src/gallium/state_trackers/hgl: add Makefile.am * winsys/sw/hgl: add Makefile.am * src/hgl/Makefile.am: add Makefile.am --- Thanks for the patch. I think Eric has a point regarding splitting this up. Here is one way to handle it: - patch 1 - the driver, aka st/hgl + sw/hgl + targets/haiku - 2 - src/egl - 3 - src/hgl - 4 misc fixes (the SoftwareRenderer.cpp hunk?) - 5 toggle - configure.ac + src/Makefile.am Hm, it looks like Jerome never got back to work on these changes... let me try to pick up the ball and run with it. Couple of small suggestions: - keep all the sources and headers in the sources lists in Makefile.sources - how do you guys manage pthreads - please mention that in the commit message. If I'm reading this correctly, you strip out -pthread and there's no pthread-stubs on Haiku. Haiku (and BeOS for that matter) has pthread support built into its core libroot.so. No need for -lpthread, all applications can assume its presence. Things that link -lpthread actually fail due to a non-existant libpthread... *however* as i'm typing this i'm being told we recently implemented a dummy static libpthread.a to try and appease assumptions about -lpthread existence so i'll remove the pthread checks :-) -- Alex ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 0/6] Implement commont gralloc_handle_t in libdrm
Hey, I haven't seen any feedback, but am looking for an ACK/LGTM. If anyone has a cycle to spare, it would be helpful :) Thanks, Rob. On 01/29/2018 06:37 PM, Robert Foss wrote: This series moves {gbm,drm,cros}_gralloc_handle_t struct to libdrm, since at least 4 implementations exist, and share a lot of contents. The idea is to keep the common stuff defined in one place, and libdrm is the common codebase to all of these platforms. Additionally, having this struct defined in libdrm will make it easier for mesa and gralloc implementations to communicate. A second series is expected to be submitted, which will contain an accessor function implementation that should that would allow each gralloc to implementation to supply their own accessors. Robert Foss (6): android: Move gralloc handle struct to libdrm android: Add version variable to gralloc_handle_t android: Mark gralloc_handle_t magic variable as const android: Remove member name from gralloc_handle_t android: Change gralloc_handle_t format from Android format to fourcc android: Change gralloc_handle_t members to be fixed width Android.mk | 8 +++- Makefile.sources | 3 ++ android/gralloc_handle.h | 109 +++ 3 files changed, 118 insertions(+), 2 deletions(-) create mode 100644 android/gralloc_handle.h ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/8] amd/common:add uvd hevc enc support check in hw query
On 2018-02-05 12:16 PM, James Zhu wrote: Based on amdgpu hardware query information to check if UVD hevc enc support Signed-off-by: James Zhu--- src/amd/common/ac_gpu_info.c | 10 +- src/amd/common/ac_gpu_info.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 6d9dcb5..2494967 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -98,7 +98,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, { struct amdgpu_buffer_size_alignments alignment_info = {}; struct amdgpu_heap_info vram, vram_vis, gtt; - struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, vce = {}, vcn_dec = {}, vcn_enc = {}; + struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, uvd_enc = {}, vce = {}, vcn_dec = {}, vcn_enc = {}; uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0; int r, i, j; drmDevicePtr devinfo; @@ -166,6 +166,12 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, return false; } + r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_UVD_ENC, 0, _enc); + if (r) { + fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd_enc) failed.\n"); + return false; + } + if (info->drm_major == 3 && info->drm_minor >= 17) { r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_DEC, 0, _dec); if (r) { @@ -275,6 +281,8 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, uvd.available_rings ? uvd_version : 0; info->vce_fw_version = vce.available_rings ? vce_version : 0; + info->uvd_enc_supported = + uvd_enc.available_rings ? true : false; info->has_userptr = true; info->has_syncobj = has_syncobj(fd); info->has_syncobj_wait_for_submit = info->has_syncobj && info->drm_minor >= 20; diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index cca3e98..6b120d1 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -65,6 +65,7 @@ struct radeon_info { uint32_tnum_compute_rings; uint32_tuvd_fw_version; uint32_tvce_fw_version; + booluvd_enc_supported; White space/tab length seems not correct here. With that fixed, this patch is Reviewed-by: Boyuan Zhang Thanks, Boyuan uint32_tme_fw_version; uint32_tme_fw_feature; uint32_tpfp_fw_version; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] util: remove redundant check for the __clang__ macro
On 02/05/2018 01:44 PM, Vlad Golovkin wrote: Clang defines __GNUC__ macro, so one doesn't need to check __clang__ macro in this particular case. Perhaps mention that in a comment below so there's no confusion. -Brian --- src/util/macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/macros.h b/src/util/macros.h index 432d513930..d36ca095d5 100644 --- a/src/util/macros.h +++ b/src/util/macros.h @@ -138,7 +138,7 @@ do { \ /* Forced function inlining */ #ifndef ALWAYS_INLINE -# if defined(__GNUC__) || defined(__clang__) +# if defined(__GNUC__) #define ALWAYS_INLINE inline __attribute__((always_inline)) # elif defined(_MSC_VER) #define ALWAYS_INLINE __forceinline ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] util: remove redundant check for the __clang__ macro
Clang defines __GNUC__ macro, so one doesn't need to check __clang__ macro in this particular case. --- src/util/macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/macros.h b/src/util/macros.h index 432d513930..d36ca095d5 100644 --- a/src/util/macros.h +++ b/src/util/macros.h @@ -138,7 +138,7 @@ do { \ /* Forced function inlining */ #ifndef ALWAYS_INLINE -# if defined(__GNUC__) || defined(__clang__) +# if defined(__GNUC__) #define ALWAYS_INLINE inline __attribute__((always_inline)) # elif defined(_MSC_VER) #define ALWAYS_INLINE __forceinline -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] ac/nir: remove emission of nir_op_fdiv
RadeonSI and RADV lower fdiv. Signed-off-by: Samuel Pitoiset--- src/amd/common/ac_nir_to_llvm.c | 5 - 1 file changed, 5 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index b211832b47..e25bae600f 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1760,11 +1760,6 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) src[1] = ac_to_float(>ac, src[1]); result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], ""); break; - case nir_op_fdiv: - src[0] = ac_to_float(>ac, src[0]); - src[1] = ac_to_float(>ac, src[1]); - result = ac_build_fdiv(>ac, src[0], src[1]); - break; case nir_op_frcp: src[0] = ac_to_float(>ac, src[0]); result = ac_build_fdiv(>ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1, -- 2.16.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 7/8] drivers/radeonsi:create uvd hevc enc entry
On 2018-02-05 12:16 PM, James Zhu wrote: Add UVD hevc encode pipe video codec creation entry Signed-off-by: James Zhu--- src/gallium/drivers/radeonsi/si_uvd.c | 13 + 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_uvd.c b/src/gallium/drivers/radeonsi/si_uvd.c index 64f2f8e..fa43a96 100644 --- a/src/gallium/drivers/radeonsi/si_uvd.c +++ b/src/gallium/drivers/radeonsi/si_uvd.c @@ -31,7 +31,8 @@ #include "radeon/radeon_vce.h" #include "radeon/radeon_vcn_dec.h" #include "radeon/radeon_vcn_enc.h" - +#include "radeon/radeon_uvd_enc.h" +#include "util/u_video.h" Could you add back the blank line please? /** * creates an video buffer with an UVD compatible memory layout */ @@ -146,9 +147,13 @@ struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context, struct si_context *ctx = (struct si_context *)context; bool vcn = (ctx->b.family == CHIP_RAVEN) ? true : false; - if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) - return (vcn) ? radeon_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer) : - si_vce_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer); + if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) { + if (u_reduce_video_profile(templ->profile) == PIPE_VIDEO_FORMAT_HEVC) { + return (vcn) ? radeon_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer) : + radeon_uvd_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer); + } else + return si_vce_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer); + } It seems that this change will break the original logic for vcn h.264 encode case, please fix it. Thanks, Boyuan return (vcn) ? radeon_create_decoder(context, templ) : si_common_uvd_create_decoder(context, templ, si_uvd_set_dtb); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/8] drivers/radeon:add uvd hevc enc functions
Implement UVD hevc encode functions Signed-off-by: James Zhu--- src/gallium/drivers/radeon/radeon_uvd_enc.c | 340 1 file changed, 340 insertions(+) create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c b/src/gallium/drivers/radeon/radeon_uvd_enc.c new file mode 100644 index 000..6eb6cda --- /dev/null +++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c @@ -0,0 +1,340 @@ +/** + * + * Copyright 2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **/ + +#include + +#include "pipe/p_video_codec.h" + +#include "util/u_video.h" +#include "util/u_memory.h" + +#include "vl/vl_video_buffer.h" + +#include "radeonsi/si_pipe.h" +#include "radeon_video.h" +#include "radeon_uvd_enc.h" + +static void radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc, struct pipe_h265_enc_picture_desc *picture) +{ + struct pipe_h265_enc_picture_desc *pic = (struct pipe_h265_enc_picture_desc *)picture; + enc->enc_pic.picture_type = pic->picture_type; + enc->enc_pic.frame_num = pic->frame_num; + enc->enc_pic.pic_order_cnt = pic->pic_order_cnt; + enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type; + enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0; + enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1; + enc->enc_pic.not_referenced = pic->not_referenced; + enc->enc_pic.is_idr = (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_IDR) || +(pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_I); + enc->enc_pic.crop_left = 0; + enc->enc_pic.crop_right = (align(enc->base.width, 16) - enc->base.width) / 2; + enc->enc_pic.crop_top = 0; + enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - enc->base.height) / 2; + enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag; + enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc; + enc->enc_pic.general_level_idc = pic->seq.general_level_idc; + enc->enc_pic.max_poc = pic->seq.intra_period; + enc->enc_pic.log2_max_poc = 0; + for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++) + i = (i >> 1); + enc->enc_pic.chroma_format_idc = pic->seq.chroma_format_idc; + enc->enc_pic.pic_width_in_luma_samples = pic->seq.pic_width_in_luma_samples; + enc->enc_pic.pic_height_in_luma_samples = pic->seq.pic_height_in_luma_samples; + enc->enc_pic.log2_diff_max_min_luma_coding_block_size = pic->seq.log2_diff_max_min_luma_coding_block_size; + enc->enc_pic.log2_min_transform_block_size_minus2 = pic->seq.log2_min_transform_block_size_minus2; + enc->enc_pic.log2_diff_max_min_transform_block_size = pic->seq.log2_diff_max_min_transform_block_size; + enc->enc_pic.max_transform_hierarchy_depth_inter = pic->seq.max_transform_hierarchy_depth_inter; + enc->enc_pic.max_transform_hierarchy_depth_intra = pic->seq.max_transform_hierarchy_depth_intra; + enc->enc_pic.log2_parallel_merge_level_minus2 = pic->pic.log2_parallel_merge_level_minus2; + enc->enc_pic.bit_depth_luma_minus8 = pic->seq.bit_depth_luma_minus8; + enc->enc_pic.bit_depth_chroma_minus8 = pic->seq.bit_depth_chroma_minus8; + enc->enc_pic.nal_unit_type = pic->pic.nal_unit_type; + enc->enc_pic.max_num_merge_cand = pic->slice.max_num_merge_cand; + enc->enc_pic.sample_adaptive_offset_enabled_flag = pic->seq.sample_adaptive_offset_enabled_flag; + enc->enc_pic.pcm_enabled_flag = pic->seq.pcm_enabled_flag; + enc->enc_pic.sps_temporal_mvp_enabled_flag = pic->seq.sps_temporal_mvp_enabled_flag; +} + +static void flush(struct radeon_uvd_encoder
Re: [Mesa-dev] [PATCH] meson: better defaults for osx, windows and cygwin
On 05/02/2018 17:34, Dylan Baker wrote: Quoting Jon Turney (2018-02-03 13:19:20) On 03/02/2018 18:07, Dylan Baker wrote: Quoting Jon Turney (2018-02-03 05:49:40) - if not ['darwin', 'windows'].contains(host_machine.system()) + if not ['darwin', 'windows', 'cygwin'].contains(host_machine.system()) +# TODO: PPC, Sparc if ['x86', 'x86_64'].contains(host_machine.cpu_family()) _drivers = 'i915,i965,r100,r200,nouveau' else error('Unknown architecture. Please pass -Ddri-drivers to set driver options. Patches gladly accepted to fix this.') endif else -error('Unknown OS. Please pass -Ddri-drivers to set driver options. Patches gladly accepted to fix this.') +# only swrast would make sense here, but gallium swrast is a much better default +_drivers = '' I'm really not a fan of dumping the 'else error' case. This currently means that for example haiku will try to build something that they cannot support. I'd really rather just set appropriate defaults for OSes that are guaranteed supported and still let OSes that haven't been tested fall through to error. I also think that's a nice place for people trying to use mesa meson on a new platform, since they understand we haven't tested on their OS. Good idea. But that's not what the code currently does. If it's not on the list of 'unknown' OSes (darwin, windows), any other OS e.g. haiku gets treated like linux... Attached is a revised patch which is more explicit about what's a known OS. I guess the BSDs probably should be added somewhere, but idk what's appropriate for them. I rather like this patch, so you can add: Reviewed-by: Dylan BakerThanks. The only thing I might do differently is instead of checking for Linux use the `system_has_kms_drm` variable (which covers the BSDs as well as Linux, but I'm okay with landing this as-is and changing that later if that is the right thing to do. Yeah, that seems pretty plausible. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/8] drivers/radeon:add uvd hevc enc functions
On 2018-02-05 01:04 PM, Alex Deucher wrote: On Mon, Feb 5, 2018 at 12:16 PM, James Zhuwrote: Implement UVD hevc encode functions Signed-off-by: James Zhu --- 1 | 21 ++ src/gallium/drivers/radeon/radeon_uvd_enc.c | 340 2 files changed, 361 insertions(+) create mode 100644 1 create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c diff --git a/1 b/1 new file mode 100644 index 000..51dd09e --- /dev/null +++ b/1 @@ -0,0 +1,21 @@ +r c80294d drivers/radeon:Add uvd hevc enc hw interface header +pick 2d924d5 drivers/radeon:add uvd hevc enc hw ib implementation + +# Rebase f2b9031..2d924d5 onto f2b9031 (2 command(s)) +# +# Commands: +# p, pick = use commit +# r, reword = use commit, but edit the commit message +# e, edit = use commit, but stop for amending +# s, squash = use commit, but meld into previous commit +# f, fixup = like "squash", but discard this commit's log message +# x, exec = run command (the rest of the line) using shell +# d, drop = remove commit +# +# These lines can be re-ordered; they are executed from top to bottom. +# +# If you remove a line here THAT COMMIT WILL BE LOST. +# +# However, if you remove everything, the rebase will be aborted. +# +# Note that empty commits are commented out Looks like some garbage got accidently added here. Removed garbage file. send out version 2 patches. diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c b/src/gallium/drivers/radeon/radeon_uvd_enc.c new file mode 100644 index 000..6eb6cda --- /dev/null +++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c @@ -0,0 +1,340 @@ +/** + * + * Copyright 2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **/ + +#include + +#include "pipe/p_video_codec.h" + +#include "util/u_video.h" +#include "util/u_memory.h" + +#include "vl/vl_video_buffer.h" + +#include "radeonsi/si_pipe.h" +#include "radeon_video.h" +#include "radeon_uvd_enc.h" + +static void radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc, struct pipe_h265_enc_picture_desc *picture) +{ + struct pipe_h265_enc_picture_desc *pic = (struct pipe_h265_enc_picture_desc *)picture; + enc->enc_pic.picture_type = pic->picture_type; + enc->enc_pic.frame_num = pic->frame_num; + enc->enc_pic.pic_order_cnt = pic->pic_order_cnt; + enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type; + enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0; + enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1; + enc->enc_pic.not_referenced = pic->not_referenced; + enc->enc_pic.is_idr = (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_IDR) || +(pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_I); + enc->enc_pic.crop_left = 0; + enc->enc_pic.crop_right = (align(enc->base.width, 16) - enc->base.width) / 2; + enc->enc_pic.crop_top = 0; + enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - enc->base.height) / 2; + enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag; + enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc; + enc->enc_pic.general_level_idc = pic->seq.general_level_idc; + enc->enc_pic.max_poc = pic->seq.intra_period; + enc->enc_pic.log2_max_poc = 0; + for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++) + i = (i >> 1); + enc->enc_pic.chroma_format_idc = pic->seq.chroma_format_idc; + enc->enc_pic.pic_width_in_luma_samples = pic->seq.pic_width_in_luma_samples; + enc->enc_pic.pic_height_in_luma_samples = pic->seq.pic_height_in_luma_samples; +
Re: [Mesa-dev] [PATCH] i965: Enable disk shader cache by default
On Sat, Feb 3, 2018 at 2:58 PM, Jordan Justenwrote: > On 2018-02-03 14:24:06, Jason Ekstrand wrote: > > On February 3, 2018 13:59:40 Jordan Justen > wrote: > > > > > Signed-off-by: Jordan Justen > > > Reviewed-by: Timothy Arceri > > > --- > > > docs/relnotes/18.1.0.html | 1 + > > > src/mesa/drivers/dri/i965/brw_disk_cache.c | 3 --- > > > 2 files changed, 1 insertion(+), 3 deletions(-) > > > > > > diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html > > > index b8a0cd0d02c..0a5878ea41f 100644 > > > --- a/docs/relnotes/18.1.0.html > > > +++ b/docs/relnotes/18.1.0.html > > > @@ -46,6 +46,7 @@ Note: some of the new features are only available > with > > > certain drivers. > > > > > > GL_EXT_semaphore on radeonsi > > > GL_EXT_semaphore_fd on radeonsi > > > +Disk shader cache support for i965 enabled by default > > > > > > > > > Bug fixes > > > diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c > > > b/src/mesa/drivers/dri/i965/brw_disk_cache.c > > > index f989456bcde..41f742e858f 100644 > > > --- a/src/mesa/drivers/dri/i965/brw_disk_cache.c > > > +++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c > > > @@ -407,9 +407,6 @@ void > > > brw_disk_cache_init(struct intel_screen *screen) > > > { > > > #ifdef ENABLE_SHADER_CACHE > > > - if (env_var_as_boolean("MESA_GLSL_CACHE_DISABLE", true)) > > > - return; > > > > Should we just flip the default so we still have the environment variable > > to shut it off if we have problems? > > The disk_cache_create function (called later) also looks at the same > variable, and it defaults to enabling the shader cache. > > That's the reason I chose to use this variable name, even though it > has meant that we had to use a double negative > (MESA_GLSL_CACHE_DISABLE=0) to allow the i965 disk shader cache to be > enabled. > Fair enough. I figured it was something like that. I just wanted to double-check. :-) --Jason > -Jordan > > > > > > - > > > char renderer[10]; > > > MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer), > "i965_%04x", > > > screen->deviceID); > > > -- > > > 2.15.1 > > > > > > ___ > > > mesa-dev mailing list > > > mesa-dev@lists.freedesktop.org > > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/4] i965/gen10: Use CS Stall instead of WriteImmediate.
The first 2 patches of this series should be added to branch 18.0 too. On Fri, Jan 26, 2018 at 11:32:38AM -0800, Rafael Antognolli wrote: > Fixes: ca19ee33d7d39cb89d948b1c983763065975ce5b > Signed-off-by: Rafael Antognolli> Cc: Kenneth Graunke > --- > src/mesa/drivers/dri/i965/brw_pipe_control.c | 10 -- > 1 file changed, 4 insertions(+), 6 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c > b/src/mesa/drivers/dri/i965/brw_pipe_control.c > index eb8ada63129..e5b3ffe640c 100644 > --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c > +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c > @@ -353,12 +353,10 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw) > void > gen10_emit_isp_disable(struct brw_context *brw) > { > - const struct gen_device_info *devinfo = >screen->devinfo; > - > - brw_emit_pipe_control_write(brw, > - PIPE_CONTROL_ISP_DIS | > - PIPE_CONTROL_WRITE_IMMEDIATE, > - brw->workaround_bo, 0, 0); > + brw_emit_pipe_control(brw, > + PIPE_CONTROL_ISP_DIS | > + PIPE_CONTROL_CS_STALL, > + NULL, 0, 0); > > brw->vs.base.push_constants_dirty = true; > brw->tcs.base.push_constants_dirty = true; > -- > 2.14.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] meson: better defaults for osx, windows and cygwin
Quoting Jon Turney (2018-02-03 13:19:20) > On 03/02/2018 18:07, Dylan Baker wrote: > > Quoting Jon Turney (2018-02-03 05:49:40) > >> - if not ['darwin', 'windows'].contains(host_machine.system()) > >> + if not ['darwin', 'windows', 'cygwin'].contains(host_machine.system()) > >> +# TODO: PPC, Sparc > >> if ['x86', 'x86_64'].contains(host_machine.cpu_family()) > >> _drivers = 'i915,i965,r100,r200,nouveau' > >> else > >> error('Unknown architecture. Please pass -Ddri-drivers to set > >> driver options. Patches gladly accepted to fix this.') > >> endif > >> else > >> -error('Unknown OS. Please pass -Ddri-drivers to set driver options. > >> Patches gladly accepted to fix this.') > >> +# only swrast would make sense here, but gallium swrast is a much > >> better default > >> +_drivers = '' > > > > I'm really not a fan of dumping the 'else error' case. This currently means > > that > > for example haiku will try to build something that they cannot support. I'd > > really rather just set appropriate defaults for OSes that are guaranteed > > supported and still let OSes that haven't been tested fall through to > > error. I > > also think that's a nice place for people trying to use mesa meson on a new > > platform, since they understand we haven't tested on their OS. > > Good idea. But that's not what the code currently does. If it's not on > the list of 'unknown' OSes (darwin, windows), any other OS e.g. haiku > gets treated like linux... > > Attached is a revised patch which is more explicit about what's a known > OS. I guess the BSDs probably should be added somewhere, but idk what's > appropriate for them. > > I rather like this patch, so you can add: Reviewed-by: Dylan BakerThe only thing I might do differently is instead of checking for Linux use the `system_has_kms_drm` variable (which covers the BSDs as well as Linux, but I'm okay with landing this as-is and changing that later if that is the right thing to do. Dylan signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] i965/nir: do int64 lowering before optimization
On Mon, Feb 5, 2018 at 5:40 AM, Iago Toral Quirogawrote: > Otherwise loop unrolling will fail to see the actual cost of > the unrolling operations when the loop body contains 64-bit integer > instructions, and very specially when the divmod64 lowering applies, > since its lowering is quite expensive. > > Without this change, some in-development CTS tests for int64 > get stuck forever trying to register allocate a shader with > over 50K SSA values. The large number of SSA values is the result > of NIR first unrolling multiple seemingly simple loops that involve > int64 instructions, only to then lower these instructions to produce > a massive pile of code (due to the divmod64 lowering in the unrolled > instructions). > > With this change, loop unrolling will see the loops with the int64 > code already lowered and will realize that it is too expensive to > unroll. > > v2: Run nir_algebraic first so we can hopefully get rid of some of > the int64 instructions before we even attempt to lower them. > --- > > For reference, I captured execution times for the CTS tests that > raised the problem. This is with debug builds of Mesa and CTS so > they are not ideal, but I think they are sufficient to see the > imapact of the patch. > > With this patch: 52s > With this v1:56s > With master: 1m:38s (*) > > (*) This is actually a significant improvement that has happened in > master since we sent the original patch. Originally, the tests would > just hang forever trying to compile. Seems like a step in the right direction. Reviewed-by: Matt Turner ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/8] drivers/radeon:add uvd hevc enc functions
On Mon, Feb 5, 2018 at 12:16 PM, James Zhuwrote: > Implement UVD hevc encode functions > > Signed-off-by: James Zhu > --- > 1 | 21 ++ > src/gallium/drivers/radeon/radeon_uvd_enc.c | 340 > > 2 files changed, 361 insertions(+) > create mode 100644 1 > create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c > > diff --git a/1 b/1 > new file mode 100644 > index 000..51dd09e > --- /dev/null > +++ b/1 > @@ -0,0 +1,21 @@ > +r c80294d drivers/radeon:Add uvd hevc enc hw interface header > +pick 2d924d5 drivers/radeon:add uvd hevc enc hw ib implementation > + > +# Rebase f2b9031..2d924d5 onto f2b9031 (2 command(s)) > +# > +# Commands: > +# p, pick = use commit > +# r, reword = use commit, but edit the commit message > +# e, edit = use commit, but stop for amending > +# s, squash = use commit, but meld into previous commit > +# f, fixup = like "squash", but discard this commit's log message > +# x, exec = run command (the rest of the line) using shell > +# d, drop = remove commit > +# > +# These lines can be re-ordered; they are executed from top to bottom. > +# > +# If you remove a line here THAT COMMIT WILL BE LOST. > +# > +# However, if you remove everything, the rebase will be aborted. > +# > +# Note that empty commits are commented out Looks like some garbage got accidently added here. > diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c > b/src/gallium/drivers/radeon/radeon_uvd_enc.c > new file mode 100644 > index 000..6eb6cda > --- /dev/null > +++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c > @@ -0,0 +1,340 @@ > +/** > + * > + * Copyright 2018 Advanced Micro Devices, Inc. > + * All Rights Reserved. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the > + * "Software"), to deal in the Software without restriction, including > + * without limitation the rights to use, copy, modify, merge, publish, > + * distribute, sub license, and/or sell copies of the Software, and to > + * permit persons to whom the Software is furnished to do so, subject to > + * the following conditions: > + * > + * The above copyright notice and this permission notice (including the > + * next paragraph) shall be included in all copies or substantial portions > + * of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS > + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. > + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR > + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, > + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE > + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. > + * > + **/ > + > +#include > + > +#include "pipe/p_video_codec.h" > + > +#include "util/u_video.h" > +#include "util/u_memory.h" > + > +#include "vl/vl_video_buffer.h" > + > +#include "radeonsi/si_pipe.h" > +#include "radeon_video.h" > +#include "radeon_uvd_enc.h" > + > +static void radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc, struct > pipe_h265_enc_picture_desc *picture) > +{ > + struct pipe_h265_enc_picture_desc *pic = (struct > pipe_h265_enc_picture_desc *)picture; > + enc->enc_pic.picture_type = pic->picture_type; > + enc->enc_pic.frame_num = pic->frame_num; > + enc->enc_pic.pic_order_cnt = pic->pic_order_cnt; > + enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type; > + enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0; > + enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1; > + enc->enc_pic.not_referenced = pic->not_referenced; > + enc->enc_pic.is_idr = (pic->picture_type == > PIPE_H265_ENC_PICTURE_TYPE_IDR) || > +(pic->picture_type == > PIPE_H265_ENC_PICTURE_TYPE_I); > + enc->enc_pic.crop_left = 0; > + enc->enc_pic.crop_right = (align(enc->base.width, 16) - > enc->base.width) / 2; > + enc->enc_pic.crop_top = 0; > + enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - > enc->base.height) / 2; > + enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag; > + enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc; > + enc->enc_pic.general_level_idc = pic->seq.general_level_idc; > + enc->enc_pic.max_poc = pic->seq.intra_period; > + enc->enc_pic.log2_max_poc = 0; > + for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++) > + i = (i >> 1); > + enc->enc_pic.chroma_format_idc = pic->seq.chroma_format_idc; > + enc->enc_pic.pic_width_in_luma_samples = > pic->seq.pic_width_in_luma_samples; > +
[Mesa-dev] [Bug 104949] swrast: Epiphany WEB browser core dumps under Mesa 17.3.3
https://bugs.freedesktop.org/show_bug.cgi?id=104949 --- Comment #3 from Daniel Stone--- Yeah, good catch! Bug filed: https://bugs.webkit.org/show_bug.cgi?id=182490 -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa: rename gl_vertex_array_object::_VertexAttrib -> _VertexArray
Since the type is gl_vertex_array. Update comment to explain that these arrays are only used by the VBO module. Also rename some local variables in _mesa_update_vao_derived_arrays(). --- src/mesa/main/arrayobj.c | 13 ++--- src/mesa/main/attrib.c| 2 +- src/mesa/main/mtypes.h| 4 ++-- src/mesa/vbo/vbo_exec_array.c | 2 +- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c index 360d097..a6fa33c 100644 --- a/src/mesa/main/arrayobj.c +++ b/src/mesa/main/arrayobj.c @@ -283,8 +283,8 @@ unbind_array_object_vbos(struct gl_context *ctx, struct gl_vertex_array_object * for (i = 0; i < ARRAY_SIZE(obj->BufferBinding); i++) _mesa_reference_buffer_object(ctx, >BufferBinding[i].BufferObj, NULL); - for (i = 0; i < ARRAY_SIZE(obj->_VertexAttrib); i++) - _mesa_reference_buffer_object(ctx, >_VertexAttrib[i].BufferObj, NULL); + for (i = 0; i < ARRAY_SIZE(obj->_VertexArray); i++) + _mesa_reference_buffer_object(ctx, >_VertexArray[i].BufferObj, NULL); } @@ -453,14 +453,13 @@ _mesa_update_vao_derived_arrays(struct gl_context *ctx, while (arrays) { const int attrib = u_bit_scan(); - struct gl_vertex_array *client_array = >_VertexAttrib[attrib]; - const struct gl_array_attributes *attrib_array = + struct gl_vertex_array *array = >_VertexArray[attrib]; + const struct gl_array_attributes *attribs = >VertexAttrib[attrib]; const struct gl_vertex_buffer_binding *buffer_binding = - >BufferBinding[attrib_array->BufferBindingIndex]; + >BufferBinding[attribs->BufferBindingIndex]; - _mesa_update_vertex_array(ctx, client_array, attrib_array, -buffer_binding); + _mesa_update_vertex_array(ctx, array, attribs, buffer_binding); } } diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c index a9e4a11..8ac5db0 100644 --- a/src/mesa/main/attrib.c +++ b/src/mesa/main/attrib.c @@ -1503,7 +1503,7 @@ copy_array_object(struct gl_context *ctx, /* skip RefCount */ for (i = 0; i < ARRAY_SIZE(src->VertexAttrib); i++) { - _mesa_copy_vertex_array(ctx, >_VertexAttrib[i], >_VertexAttrib[i]); + _mesa_copy_vertex_array(ctx, >_VertexArray[i], >_VertexArray[i]); _mesa_copy_vertex_attrib_array(ctx, >VertexAttrib[i], >VertexAttrib[i]); _mesa_copy_vertex_buffer_binding(ctx, >BufferBinding[i], >BufferBinding[i]); } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 3a67d43..aa083c3 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1603,9 +1603,9 @@ struct gl_vertex_array_object * Derived vertex attribute arrays * * This is a legacy data structure created from gl_array_attributes and -* gl_vertex_buffer_binding, for compatibility with existing driver code. +* gl_vertex_buffer_binding, only used by the VBO module at this time. */ - struct gl_vertex_array _VertexAttrib[VERT_ATTRIB_MAX]; + struct gl_vertex_array _VertexArray[VERT_ATTRIB_MAX]; /** Vertex attribute arrays */ struct gl_array_attributes VertexAttrib[VERT_ATTRIB_MAX]; diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 42759d5..a5bedc8 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -314,7 +314,7 @@ recalculate_input_bindings(struct gl_context *ctx) struct vbo_context *vbo = vbo_context(ctx); struct vbo_exec_context *exec = >exec; const struct gl_vertex_array_object *vao = ctx->Array.VAO; - const struct gl_vertex_array *vertexAttrib = vao->_VertexAttrib; + const struct gl_vertex_array *vertexAttrib = vao->_VertexArray; const struct gl_vertex_array **inputs = >array.inputs[0]; /* May shuffle the position and generic0 bits around */ -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] st/mesa: refactor st_bufferobj_map_range()
Use a new helper function, st_access_flags_to_transfer_flags(), to convert the GL_MAP_x flags to PIPE_TRANSFER_x flags. We'll be able to use this function in a couple other places. --- src/mesa/state_tracker/st_cb_bufferobjects.c | 38 src/mesa/state_tracker/st_cb_bufferobjects.h | 4 +++ 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c index 6b64ba1..044916b 100644 --- a/src/mesa/state_tracker/st_cb_bufferobjects.c +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -427,17 +427,13 @@ st_bufferobj_invalidate(struct gl_context *ctx, /** - * Called via glMapBufferRange(). + * Convert GLbitfield of GL_MAP_x flags to gallium pipe_transfer_usage flags. + * \param wholeBuffer is the whole buffer being mapped? */ -static void * -st_bufferobj_map_range(struct gl_context *ctx, - GLintptr offset, GLsizeiptr length, GLbitfield access, - struct gl_buffer_object *obj, - gl_map_buffer_index index) +enum pipe_transfer_usage +st_access_flags_to_transfer_flags(GLbitfield access, bool wholeBuffer) { - struct pipe_context *pipe = st_context(ctx)->pipe; - struct st_buffer_object *st_obj = st_buffer_object(obj); - enum pipe_transfer_usage flags = 0x0; + enum pipe_transfer_usage flags = 0; if (access & GL_MAP_WRITE_BIT) flags |= PIPE_TRANSFER_WRITE; @@ -452,7 +448,7 @@ st_bufferobj_map_range(struct gl_context *ctx, flags |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; } else if (access & GL_MAP_INVALIDATE_RANGE_BIT) { - if (offset == 0 && length == obj->Size) + if (wholeBuffer) flags |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; else flags |= PIPE_TRANSFER_DISCARD_RANGE; @@ -473,15 +469,35 @@ st_bufferobj_map_range(struct gl_context *ctx, if (access & MESA_MAP_NOWAIT_BIT) flags |= PIPE_TRANSFER_DONTBLOCK; + return flags; +} + + +/** + * Called via glMapBufferRange(). + */ +static void * +st_bufferobj_map_range(struct gl_context *ctx, + GLintptr offset, GLsizeiptr length, GLbitfield access, + struct gl_buffer_object *obj, + gl_map_buffer_index index) +{ + struct pipe_context *pipe = st_context(ctx)->pipe; + struct st_buffer_object *st_obj = st_buffer_object(obj); + assert(offset >= 0); assert(length >= 0); assert(offset < obj->Size); assert(offset + length <= obj->Size); + const enum pipe_transfer_usage transfer_flags = + st_access_flags_to_transfer_flags(access, +offset == 0 && length == obj->Size); + obj->Mappings[index].Pointer = pipe_buffer_map_range(pipe, st_obj->buffer, offset, length, -flags, +transfer_flags, _obj->transfer[index]); if (obj->Mappings[index].Pointer) { obj->Mappings[index].Offset = offset; diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.h b/src/mesa/state_tracker/st_cb_bufferobjects.h index ea77c58..534506a 100644 --- a/src/mesa/state_tracker/st_cb_bufferobjects.h +++ b/src/mesa/state_tracker/st_cb_bufferobjects.h @@ -55,6 +55,10 @@ st_buffer_object(struct gl_buffer_object *obj) } +enum pipe_transfer_usage +st_access_flags_to_transfer_flags(GLbitfield access, bool wholeBuffer); + + extern void st_init_bufferobject_functions(struct pipe_screen *screen, struct dd_function_table *functions); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] st/mesa: use st_access_flags_to_transfer_flags() helper in more places
--- src/mesa/state_tracker/st_cb_fbo.c | 18 +- src/mesa/state_tracker/st_cb_texture.c | 17 - 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 3a5c03c..0800f5b 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -47,6 +47,7 @@ #include "pipe/p_screen.h" #include "st_atom.h" #include "st_context.h" +#include "st_cb_bufferobjects.h" #include "st_cb_fbo.h" #include "st_cb_flush.h" #include "st_cb_texture.h" @@ -780,7 +781,6 @@ st_MapRenderbuffer(struct gl_context *ctx, struct st_renderbuffer *strb = st_renderbuffer(rb); struct pipe_context *pipe = st->pipe; const GLboolean invert = rb->Name == 0; - unsigned usage; GLuint y2; GLubyte *map; @@ -800,13 +800,13 @@ st_MapRenderbuffer(struct gl_context *ctx, return; } - usage = 0x0; - if (mode & GL_MAP_READ_BIT) - usage |= PIPE_TRANSFER_READ; - if (mode & GL_MAP_WRITE_BIT) - usage |= PIPE_TRANSFER_WRITE; - if (mode & GL_MAP_INVALIDATE_RANGE_BIT) - usage |= PIPE_TRANSFER_DISCARD_RANGE; + /* Check for unexpected flags */ + assert((mode & ~(GL_MAP_READ_BIT | +GL_MAP_WRITE_BIT | +GL_MAP_INVALIDATE_RANGE_BIT)) == 0); + + const enum pipe_transfer_usage transfer_flags = + st_access_flags_to_transfer_flags(mode, false); /* Note: y=0=bottom of buffer while y2=0=top of buffer. * 'invert' will be true for window-system buffers and false for @@ -821,7 +821,7 @@ st_MapRenderbuffer(struct gl_context *ctx, strb->texture, strb->surface->u.tex.level, strb->surface->u.tex.first_layer, -usage, x, y2, w, h, >transfer); +transfer_flags, x, y2, w, h, >transfer); if (map) { if (invert) { *rowStrideOut = -(int) strb->transfer->stride; diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 98f2443..6345ead 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -254,19 +254,18 @@ st_MapTextureImage(struct gl_context *ctx, { struct st_context *st = st_context(ctx); struct st_texture_image *stImage = st_texture_image(texImage); - unsigned pipeMode; GLubyte *map; struct pipe_transfer *transfer; - pipeMode = 0x0; - if (mode & GL_MAP_READ_BIT) - pipeMode |= PIPE_TRANSFER_READ; - if (mode & GL_MAP_WRITE_BIT) - pipeMode |= PIPE_TRANSFER_WRITE; - if (mode & GL_MAP_INVALIDATE_RANGE_BIT) - pipeMode |= PIPE_TRANSFER_DISCARD_RANGE; + /* Check for unexpected flags */ + assert((mode & ~(GL_MAP_READ_BIT | +GL_MAP_WRITE_BIT | +GL_MAP_INVALIDATE_RANGE_BIT)) == 0); - map = st_texture_image_map(st, stImage, pipeMode, x, y, slice, w, h, 1, + const enum pipe_transfer_usage transfer_flags = + st_access_flags_to_transfer_flags(mode, false); + + map = st_texture_image_map(st, stImage, transfer_flags, x, y, slice, w, h, 1, ); if (map) { if (st_etc_fallback(st, texImage)) { -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] st/mesa: refactor bufferobj_data()
Split out some of the code into three new helper functions: buffer_target_to_bind_flags(), storage_flags_to_buffer_flags(), buffer_usage() to make the code more managable. --- src/mesa/state_tracker/st_cb_bufferobjects.c | 191 +++ 1 file changed, 104 insertions(+), 87 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c index a9104a9..6b64ba1 100644 --- a/src/mesa/state_tracker/st_cb_bufferobjects.c +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -164,107 +164,84 @@ st_bufferobj_get_subdata(struct gl_context *ctx, offset, size, data); } -static ALWAYS_INLINE GLboolean -bufferobj_data(struct gl_context *ctx, - GLenum target, - GLsizeiptrARB size, - const void *data, - struct gl_memory_object *memObj, - GLuint64 offset, - GLenum usage, - GLbitfield storageFlags, - struct gl_buffer_object *obj) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - struct pipe_screen *screen = pipe->screen; - struct st_buffer_object *st_obj = st_buffer_object(obj); - struct st_memory_object *st_mem_obj = st_memory_object(memObj); - unsigned bind, pipe_usage, pipe_flags = 0; - - if (target != GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD && - size && st_obj->buffer && - st_obj->Base.Size == size && - st_obj->Base.Usage == usage && - st_obj->Base.StorageFlags == storageFlags) { - if (data) { - /* Just discard the old contents and write new data. - * This should be the same as creating a new buffer, but we avoid - * a lot of validation in Mesa. - */ - pipe->buffer_subdata(pipe, st_obj->buffer, - PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE, - 0, size, data); - return GL_TRUE; - } else if (screen->get_param(screen, PIPE_CAP_INVALIDATE_BUFFER)) { - pipe->invalidate_resource(pipe, st_obj->buffer); - return GL_TRUE; - } - } - - st_obj->Base.Size = size; - st_obj->Base.Usage = usage; - st_obj->Base.StorageFlags = storageFlags; +/** + * Return bitmask of PIPE_BIND_x flags corresponding a GL buffer target. + */ +static unsigned +buffer_target_to_bind_flags(GLenum target) +{ switch (target) { case GL_PIXEL_PACK_BUFFER_ARB: case GL_PIXEL_UNPACK_BUFFER_ARB: - bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; - break; + return PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; case GL_ARRAY_BUFFER_ARB: - bind = PIPE_BIND_VERTEX_BUFFER; - break; + return PIPE_BIND_VERTEX_BUFFER; case GL_ELEMENT_ARRAY_BUFFER_ARB: - bind = PIPE_BIND_INDEX_BUFFER; - break; + return PIPE_BIND_INDEX_BUFFER; case GL_TEXTURE_BUFFER: - bind = PIPE_BIND_SAMPLER_VIEW; - break; + return PIPE_BIND_SAMPLER_VIEW; case GL_TRANSFORM_FEEDBACK_BUFFER: - bind = PIPE_BIND_STREAM_OUTPUT; - break; + return PIPE_BIND_STREAM_OUTPUT; case GL_UNIFORM_BUFFER: - bind = PIPE_BIND_CONSTANT_BUFFER; - break; + return PIPE_BIND_CONSTANT_BUFFER; case GL_DRAW_INDIRECT_BUFFER: case GL_PARAMETER_BUFFER_ARB: - bind = PIPE_BIND_COMMAND_ARGS_BUFFER; - break; + return PIPE_BIND_COMMAND_ARGS_BUFFER; case GL_ATOMIC_COUNTER_BUFFER: case GL_SHADER_STORAGE_BUFFER: - bind = PIPE_BIND_SHADER_BUFFER; - break; + return PIPE_BIND_SHADER_BUFFER; case GL_QUERY_BUFFER: - bind = PIPE_BIND_QUERY_BUFFER; - break; + return PIPE_BIND_QUERY_BUFFER; default: - bind = 0; + return 0; } +} - /* Set usage. */ - if (st_obj->Base.Immutable) { + +/** + * Return bitmask of PIPE_RESOURCE_x flags corresponding to GL_MAP_x flags. + */ +static unsigned +storage_flags_to_buffer_flags(GLbitfield storageFlags) +{ + unsigned flags = 0; + if (storageFlags & GL_MAP_PERSISTENT_BIT) + flags |= PIPE_RESOURCE_FLAG_MAP_PERSISTENT; + if (storageFlags & GL_MAP_COHERENT_BIT) + flags |= PIPE_RESOURCE_FLAG_MAP_COHERENT; + if (storageFlags & GL_SPARSE_STORAGE_BIT_ARB) + flags |= PIPE_RESOURCE_FLAG_SPARSE; + return flags; +} + + +/** + * From a buffer object's target, immutability flag, storage flags and + * usage hint, return a pipe_resource_usage value (PIPE_USAGE_DYNAMIC, + * STREAM, etc). + */ +static const enum pipe_resource_usage +buffer_usage(GLenum target, GLboolean immutable, + GLbitfield storageFlags, GLenum usage) +{ + if (immutable) { /* BufferStorage */ if (storageFlags & GL_CLIENT_STORAGE_BIT) { if (storageFlags & GL_MAP_READ_BIT) -pipe_usage = PIPE_USAGE_STAGING; +return PIPE_USAGE_STAGING; else -pipe_usage = PIPE_USAGE_STREAM; +return PIPE_USAGE_STREAM; } else
[Mesa-dev] [Bug 104777] Attaching multiple shader objects for the same stage to a GLSL program triggers a linker error
https://bugs.freedesktop.org/show_bug.cgi?id=104777 Juan A. Suarezchanged: What|Removed |Added Resolution|--- |FIXED Status|NEW |RESOLVED --- Comment #3 from Juan A. Suarez --- commit 4195eed961ccfe404ae81b9112189fc93a254ded Author: Juan A. Suarez Romero Date: Mon Feb 5 17:38:39 2018 +0100 glsl/linker: check same name is not used in block and outside According with OpenGL GLSL 3.20 spec, section 4.3.9: "It is a link-time error if any particular shader interface contains: - two different blocks, each having no instance name, and each having a member of the same name, or - a variable outside a block, and a block with no instance name, where the variable has the same name as a member in the block." This fixes a previous commit 9b894c8 ("glsl/linker: link-error using the same name in unnamed block and outside") that covered this case, but did not take in account that precision qualifiers are ignored when comparing blocks with no instance name. With this commit, the original tests KHR-GL*.shaders.uniform_block.common.name_matching keep fixed, and also dEQP-GLES31.functional.shaders.linkage.uniform.block.differing_precision regression is fixed, which was broken by previous commit. v2: use helper varibles (Matteo Bruni) Fixes: 9b894c8 ("glsl/linker: link-error using the same name in unnamed block and outside") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104668 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104777 CC: Mark Janes CC: "18.0" Tested-by: Matteo Bruni Reviewed-by: Tapani Pälli Signed-off-by: Juan A. Suarez Romero src/compiler/glsl/linker.cpp | 53 ++--- 1 file changed, 30 insertions(+), 23 deletions(-) -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 8/8] drivers/radeonsi: enable uvd encode for HEVC main
Enable UVD encode for HEVC main profile Signed-off-by: James Zhu--- src/gallium/drivers/radeonsi/si_get.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 8002362..64f76b4 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -24,6 +24,7 @@ #include "si_pipe.h" #include "radeon/radeon_video.h" #include "radeon/radeon_vce.h" +#include "radeon/radeon_uvd_enc.h" #include "ac_llvm_util.h" #include "vl/vl_decoder.h" #include "vl/vl_video_buffer.h" @@ -587,7 +588,8 @@ static int si_get_video_param(struct pipe_screen *screen, (si_vce_is_fw_version_supported(sscreen) || sscreen->info.family == CHIP_RAVEN)) || (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN && - sscreen->info.family == CHIP_RAVEN); + (sscreen->info.family == CHIP_RAVEN || + si_radeon_uvd_enc_supported(sscreen))); case PIPE_VIDEO_CAP_NPOT_TEXTURES: return 1; case PIPE_VIDEO_CAP_MAX_WIDTH: -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/8] drivers/radeon:add uvd hevc enc functions
Implement UVD hevc encode functions Signed-off-by: James Zhu--- 1 | 21 ++ src/gallium/drivers/radeon/radeon_uvd_enc.c | 340 2 files changed, 361 insertions(+) create mode 100644 1 create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c diff --git a/1 b/1 new file mode 100644 index 000..51dd09e --- /dev/null +++ b/1 @@ -0,0 +1,21 @@ +r c80294d drivers/radeon:Add uvd hevc enc hw interface header +pick 2d924d5 drivers/radeon:add uvd hevc enc hw ib implementation + +# Rebase f2b9031..2d924d5 onto f2b9031 (2 command(s)) +# +# Commands: +# p, pick = use commit +# r, reword = use commit, but edit the commit message +# e, edit = use commit, but stop for amending +# s, squash = use commit, but meld into previous commit +# f, fixup = like "squash", but discard this commit's log message +# x, exec = run command (the rest of the line) using shell +# d, drop = remove commit +# +# These lines can be re-ordered; they are executed from top to bottom. +# +# If you remove a line here THAT COMMIT WILL BE LOST. +# +# However, if you remove everything, the rebase will be aborted. +# +# Note that empty commits are commented out diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c b/src/gallium/drivers/radeon/radeon_uvd_enc.c new file mode 100644 index 000..6eb6cda --- /dev/null +++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c @@ -0,0 +1,340 @@ +/** + * + * Copyright 2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **/ + +#include + +#include "pipe/p_video_codec.h" + +#include "util/u_video.h" +#include "util/u_memory.h" + +#include "vl/vl_video_buffer.h" + +#include "radeonsi/si_pipe.h" +#include "radeon_video.h" +#include "radeon_uvd_enc.h" + +static void radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc, struct pipe_h265_enc_picture_desc *picture) +{ + struct pipe_h265_enc_picture_desc *pic = (struct pipe_h265_enc_picture_desc *)picture; + enc->enc_pic.picture_type = pic->picture_type; + enc->enc_pic.frame_num = pic->frame_num; + enc->enc_pic.pic_order_cnt = pic->pic_order_cnt; + enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type; + enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0; + enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1; + enc->enc_pic.not_referenced = pic->not_referenced; + enc->enc_pic.is_idr = (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_IDR) || +(pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_I); + enc->enc_pic.crop_left = 0; + enc->enc_pic.crop_right = (align(enc->base.width, 16) - enc->base.width) / 2; + enc->enc_pic.crop_top = 0; + enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - enc->base.height) / 2; + enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag; + enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc; + enc->enc_pic.general_level_idc = pic->seq.general_level_idc; + enc->enc_pic.max_poc = pic->seq.intra_period; + enc->enc_pic.log2_max_poc = 0; + for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++) + i = (i >> 1); + enc->enc_pic.chroma_format_idc = pic->seq.chroma_format_idc; + enc->enc_pic.pic_width_in_luma_samples = pic->seq.pic_width_in_luma_samples; + enc->enc_pic.pic_height_in_luma_samples = pic->seq.pic_height_in_luma_samples; + enc->enc_pic.log2_diff_max_min_luma_coding_block_size = pic->seq.log2_diff_max_min_luma_coding_block_size; + enc->enc_pic.log2_min_transform_block_size_minus2 = pic->seq.log2_min_transform_block_size_minus2; +
[Mesa-dev] [PATCH 2/8] amdgpu/drm:add uvd hevc enc support in amdgpu cs
Signed-off-by: James Zhu--- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 1927a3a..6f305b7 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -376,6 +376,7 @@ static bool amdgpu_cs_has_user_fence(struct amdgpu_cs_context *cs) { return cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD && cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCE && + cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD_ENC && cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_DEC && cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_ENC; } @@ -818,6 +819,10 @@ static bool amdgpu_init_cs_context(struct amdgpu_cs_context *cs, cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD; break; + case RING_UVD_ENC: + cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD_ENC; + break; + case RING_VCE: cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCE; break; @@ -1533,6 +1538,7 @@ static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs, ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4; break; case RING_UVD: + case RING_UVD_ENC: while (rcs->current.cdw & 15) radeon_emit(rcs, 0x8000); /* type2 nop packet */ break; -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/8] amd/common:add uvd hevc enc support check in hw query
Based on amdgpu hardware query information to check if UVD hevc enc support Signed-off-by: James Zhu--- src/amd/common/ac_gpu_info.c | 10 +- src/amd/common/ac_gpu_info.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 6d9dcb5..2494967 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -98,7 +98,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, { struct amdgpu_buffer_size_alignments alignment_info = {}; struct amdgpu_heap_info vram, vram_vis, gtt; - struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, vce = {}, vcn_dec = {}, vcn_enc = {}; + struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, uvd_enc = {}, vce = {}, vcn_dec = {}, vcn_enc = {}; uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0; int r, i, j; drmDevicePtr devinfo; @@ -166,6 +166,12 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, return false; } + r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_UVD_ENC, 0, _enc); + if (r) { + fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd_enc) failed.\n"); + return false; + } + if (info->drm_major == 3 && info->drm_minor >= 17) { r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_DEC, 0, _dec); if (r) { @@ -275,6 +281,8 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, uvd.available_rings ? uvd_version : 0; info->vce_fw_version = vce.available_rings ? vce_version : 0; + info->uvd_enc_supported = + uvd_enc.available_rings ? true : false; info->has_userptr = true; info->has_syncobj = has_syncobj(fd); info->has_syncobj_wait_for_submit = info->has_syncobj && info->drm_minor >= 20; diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index cca3e98..6b120d1 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -65,6 +65,7 @@ struct radeon_info { uint32_tnum_compute_rings; uint32_tuvd_fw_version; uint32_tvce_fw_version; + booluvd_enc_supported; uint32_tme_fw_version; uint32_tme_fw_feature; uint32_tpfp_fw_version; -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev