[Mesa-dev] [PATCH 3/3] nir: drop the variable name when serializing
We know NIR can handle this because the shaders coming from spirv don't have names. Dropping the name makes the shaders more generic which can make it better when using the serialized NIR as a key for in memory shader caches such as what we do for radeonsi. Also it just means we can write/read less to and from the disk cache. For now we add a param to keep uniform names as they are still required by the st when assigning uniform locations. --- src/compiler/nir/nir_serialize.c | 29 ++- src/compiler/nir/nir_serialize.h | 3 +- .../drivers/radeonsi/si_state_shaders.c | 2 +- src/intel/vulkan/anv_pipeline_cache.c | 2 +- .../drivers/dri/i965/brw_program_binary.c | 2 +- src/mesa/state_tracker/st_shader_cache.c | 2 +- 6 files changed, 21 insertions(+), 19 deletions(-) diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c index adcc0cf43df..e30e125d7b7 100644 --- a/src/compiler/nir/nir_serialize.c +++ b/src/compiler/nir/nir_serialize.c @@ -132,12 +132,12 @@ read_constant(read_ctx *ctx, nir_variable *nvar) } static void -write_variable(write_ctx *ctx, const nir_variable *var) +write_variable(write_ctx *ctx, const nir_variable *var, bool keep_name) { write_add_object(ctx, var); encode_type_to_blob(ctx->blob, var->type); - blob_write_uint32(ctx->blob, !!(var->name)); - if (var->name) + blob_write_uint32(ctx->blob, (!!(var->name)) && keep_name); + if (var->name && keep_name) blob_write_string(ctx->blob, var->name); blob_write_bytes(ctx->blob, (uint8_t *) >data, sizeof(var->data)); blob_write_uint32(ctx->blob, var->num_state_slots); @@ -197,11 +197,11 @@ read_variable(read_ctx *ctx) } static void -write_var_list(write_ctx *ctx, const struct exec_list *src) +write_var_list(write_ctx *ctx, const struct exec_list *src, bool keep_names) { blob_write_uint32(ctx->blob, exec_list_length(src)); foreach_list_typed(nir_variable, var, node, src) { - write_variable(ctx, var); + write_variable(ctx, var, keep_names); } } @@ -996,7 +996,7 @@ read_cf_list(read_ctx *ctx, struct exec_list *cf_list) static void write_function_impl(write_ctx *ctx, const nir_function_impl *fi) { - write_var_list(ctx, >locals); + write_var_list(ctx, >locals, false); write_reg_list(ctx, >registers); blob_write_uint32(ctx->blob, fi->reg_alloc); @@ -1070,7 +1070,8 @@ read_function(read_ctx *ctx) } void -nir_serialize(struct blob *blob, const nir_shader *nir) +nir_serialize(struct blob *blob, const nir_shader *nir, + bool keep_uniform_names) { write_ctx ctx; ctx.remap_table = _mesa_pointer_hash_table_create(NULL); @@ -1085,12 +1086,12 @@ nir_serialize(struct blob *blob, const nir_shader *nir) info.name = info.label = NULL; blob_write_bytes(blob, (uint8_t *) , sizeof(info)); - write_var_list(, >uniforms); - write_var_list(, >inputs); - write_var_list(, >outputs); - write_var_list(, >shared); - write_var_list(, >globals); - write_var_list(, >system_values); + write_var_list(, >uniforms, keep_uniform_names); + write_var_list(, >inputs, false); + write_var_list(, >outputs, false); + write_var_list(, >shared, false); + write_var_list(, >globals, false); + write_var_list(, >system_values, false); write_reg_list(, >registers); blob_write_uint32(blob, nir->reg_alloc); @@ -1178,7 +1179,7 @@ nir_shader_serialize_deserialize(void *mem_ctx, nir_shader *s) struct blob writer; blob_init(); - nir_serialize(, s); + nir_serialize(, s, true); ralloc_free(s); struct blob_reader reader; diff --git a/src/compiler/nir/nir_serialize.h b/src/compiler/nir/nir_serialize.h index f77d8e367ff..41e4de5211e 100644 --- a/src/compiler/nir/nir_serialize.h +++ b/src/compiler/nir/nir_serialize.h @@ -31,7 +31,8 @@ extern "C" { #endif -void nir_serialize(struct blob *blob, const nir_shader *nir); +void nir_serialize(struct blob *blob, const nir_shader *nir, + bool keep_uniform_names); nir_shader *nir_deserialize(void *mem_ctx, const struct nir_shader_compiler_options *options, struct blob_reader *blob); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 5bdfd4f6ac1..ea824b278e4 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -58,7 +58,7 @@ void *si_get_ir_binary(struct si_shader_selector *sel) assert(sel->nir); blob_init(); - nir_serialize(, sel->nir); + nir_serialize(, sel->nir, false); ir_binary = blob.data; ir_size = blob.size; } diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index ad1bcf0940f..04d3fdfbbdd 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c
[Mesa-dev] [PATCH 1/3] nir: just drop the register name when serializing
We know NIR can handle this because the shaders coming from spirv don't have names. Dropping the name makes the shaders more generic which can make it better when using the serialized NIR as a key for in memory shader caches such as what we do for radeonsi. Also it just means we can write/read less to and from the disk cache. --- src/compiler/nir/nir_serialize.c | 11 +-- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c index 743eeaed3d5..da41f2ea3f2 100644 --- a/src/compiler/nir/nir_serialize.c +++ b/src/compiler/nir/nir_serialize.c @@ -224,9 +224,6 @@ write_register(write_ctx *ctx, const nir_register *reg) blob_write_uint32(ctx->blob, reg->bit_size); blob_write_uint32(ctx->blob, reg->num_array_elems); blob_write_uint32(ctx->blob, reg->index); - blob_write_uint32(ctx->blob, !!(reg->name)); - if (reg->name) - blob_write_string(ctx->blob, reg->name); blob_write_uint32(ctx->blob, reg->is_global << 1 | reg->is_packed); } @@ -239,13 +236,7 @@ read_register(read_ctx *ctx) reg->bit_size = blob_read_uint32(ctx->blob); reg->num_array_elems = blob_read_uint32(ctx->blob); reg->index = blob_read_uint32(ctx->blob); - bool has_name = blob_read_uint32(ctx->blob); - if (has_name) { - const char *name = blob_read_string(ctx->blob); - reg->name = ralloc_strdup(reg, name); - } else { - reg->name = NULL; - } + reg->name = NULL; unsigned flags = blob_read_uint32(ctx->blob); reg->is_global = flags & 0x2; reg->is_packed = flags & 0x1; -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] nir: drop name and label when serializing
Dropping these makes the shaders more generic which can make it better when using the serialized NIR as a key for in memory shader caches such as what we do for radeonsi. Also it just means we can write/read less to and from the disk cache. --- src/compiler/nir/nir_serialize.c | 17 - 1 file changed, 17 deletions(-) diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c index da41f2ea3f2..adcc0cf43df 100644 --- a/src/compiler/nir/nir_serialize.c +++ b/src/compiler/nir/nir_serialize.c @@ -1082,16 +1082,6 @@ nir_serialize(struct blob *blob, const nir_shader *nir) size_t idx_size_offset = blob_reserve_intptr(blob); struct shader_info info = nir->info; - uint32_t strings = 0; - if (info.name) - strings |= 0x1; - if (info.label) - strings |= 0x2; - blob_write_uint32(blob, strings); - if (info.name) - blob_write_string(blob, info.name); - if (info.label) - blob_write_string(blob, info.label); info.name = info.label = NULL; blob_write_bytes(blob, (uint8_t *) , sizeof(info)); @@ -1140,18 +1130,11 @@ nir_deserialize(void *mem_ctx, ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t)); ctx.next_idx = 0; - uint32_t strings = blob_read_uint32(blob); - char *name = (strings & 0x1) ? blob_read_string(blob) : NULL; - char *label = (strings & 0x2) ? blob_read_string(blob) : NULL; - struct shader_info info; blob_copy_bytes(blob, (uint8_t *) , sizeof(info)); ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL); - info.name = name ? ralloc_strdup(ctx.nir, name) : NULL; - info.label = label ? ralloc_strdup(ctx.nir, label) : NULL; - ctx.nir->info = info; read_var_list(, >uniforms); -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] mesa: Track buffer object use also for VAO usage.
From: Mathias Fröhlich We already track the usage history for buffer objects in a lot of aspects. Add GL_ARRAY_BUFFER and GL_ELEMENT_ARRAY_BUFFER to gl_buffer_object::UsageHistory. Signed-off-by: Mathias Fröhlich --- src/mesa/main/arrayobj.c | 4 +++- src/mesa/main/bufferobj.c | 5 + src/mesa/main/mtypes.h| 4 +++- src/mesa/main/varray.c| 6 -- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c index bfd6fce6798..68d30aa9b1f 100644 --- a/src/mesa/main/arrayobj.c +++ b/src/mesa/main/arrayobj.c @@ -1213,8 +1213,10 @@ vertex_array_element_buffer(struct gl_context *ctx, GLuint vaobj, GLuint buffer, bufObj = ctx->Shared->NullBufferObj; } - if (bufObj) + if (bufObj) { + bufObj->UsageHistory |= USAGE_ELEMENT_ARRAY_BUFFER; _mesa_reference_buffer_object(ctx, >IndexBufferObj, bufObj); + } } diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index f9e52942d47..3caf363b37f 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -113,8 +113,13 @@ get_buffer_target(struct gl_context *ctx, GLenum target) switch (target) { case GL_ARRAY_BUFFER_ARB: + if (ctx->Array.ArrayBufferObj) + ctx->Array.ArrayBufferObj->UsageHistory |= USAGE_ARRAY_BUFFER; return >Array.ArrayBufferObj; case GL_ELEMENT_ARRAY_BUFFER_ARB: + if (ctx->Array.VAO->IndexBufferObj) + ctx->Array.VAO->IndexBufferObj->UsageHistory +|= USAGE_ELEMENT_ARRAY_BUFFER; return >Array.VAO->IndexBufferObj; case GL_PIXEL_PACK_BUFFER_EXT: return >Pack.BufferObj; diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 9bca5c153ad..96f30d4a4d5 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1339,7 +1339,9 @@ typedef enum USAGE_SHADER_STORAGE_BUFFER = 0x8, USAGE_TRANSFORM_FEEDBACK_BUFFER = 0x10, USAGE_PIXEL_PACK_BUFFER = 0x20, - USAGE_DISABLE_MINMAX_CACHE = 0x40, + USAGE_ARRAY_BUFFER = 0x40, + USAGE_ELEMENT_ARRAY_BUFFER = 0x80, + USAGE_DISABLE_MINMAX_CACHE = 0x100, } gl_buffer_usage; diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c index 5af5a7f773f..e6057c7f881 100644 --- a/src/mesa/main/varray.c +++ b/src/mesa/main/varray.c @@ -209,10 +209,12 @@ _mesa_bind_vertex_buffer(struct gl_context *ctx, binding->Offset = offset; binding->Stride = stride; - if (!_mesa_is_bufferobj(vbo)) + if (!_mesa_is_bufferobj(vbo)) { vao->VertexAttribBufferMask &= ~binding->_BoundArrays; - else + } else { vao->VertexAttribBufferMask |= binding->_BoundArrays; + vbo->UsageHistory |= USAGE_ARRAY_BUFFER; + } vao->NewArrays |= vao->Enabled & binding->_BoundArrays; if (vao == ctx->Array.VAO) -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/2] Track vertex buffer usage for buffer objects.
From: Mathias Fröhlich Hi Brian, I have an other VAO optimization aspect for review. Currently gallium just invalidates the array state on every glBufferData type call. The change adds buffer object usage tracking for vertex buffer objects and avoids invalidating array state on for example uniform buffer object data uploads. The change does not introduce piglit regressions on radeonsi. Please review thanks Mathias Mathias Fröhlich (2): mesa: Track buffer object use also for VAO usage. st/mesa: Invalidate the gallium array atom only if needed. src/mesa/main/arrayobj.c | 4 +++- src/mesa/main/bufferobj.c| 5 + src/mesa/main/mtypes.h | 4 +++- src/mesa/main/varray.c | 6 -- src/mesa/state_tracker/st_cb_bufferobjects.c | 6 -- 5 files changed, 19 insertions(+), 6 deletions(-) -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] st/mesa: Invalidate the gallium array atom only if needed.
From: Mathias Fröhlich Now that the buffer object usage history tracks if it is being used as vertex buffer object, we can restrict setting the ST_NEW_VERTEX_ARRAYS bit to dirty on glBufferData calls to buffers that are potentially used as vertex buffer object. Also put a note that the same could be done for index arrays used in indexed draws. Signed-off-by: Mathias Fröhlich --- src/mesa/state_tracker/st_cb_bufferobjects.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c index 5ebe94f4545..b05f2516980 100644 --- a/src/mesa/state_tracker/st_cb_bufferobjects.c +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -357,8 +357,10 @@ bufferobj_data(struct gl_context *ctx, /* The current buffer may be bound, so we have to revalidate all atoms that * might be using it. */ - /* TODO: Add arrays to usage history */ - ctx->NewDriverState |= ST_NEW_VERTEX_ARRAYS; + if (st_obj->Base.UsageHistory & USAGE_ARRAY_BUFFER) + ctx->NewDriverState |= ST_NEW_VERTEX_ARRAYS; + /* if (st_obj->Base.UsageHistory & USAGE_ELEMENT_ARRAY_BUFFER) */ + /*ctx->NewDriverState |= TODO: Handle indices as gallium state; */ if (st_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER) ctx->NewDriverState |= ST_NEW_UNIFORM_BUFFER; if (st_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER) -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium/util: Fix off-by-one in box intersection
Am 01.03.19 um 00:28 schrieb Gurchetan Singh: > On Thu, Feb 28, 2019 at 12:39 AM Boris Brezillon > wrote: >> >> Hello Gurchetan, >> >> On Wed, 27 Feb 2019 10:34:26 -0800 >> Gurchetan Singh wrote: >> >>> On Mon, Feb 25, 2019 at 12:35 AM Boris Brezillon >>> wrote: From: Daniel Stone pipe_boxes are x/y + width/height, rather than x0/y0 -> x1/y1. This means that (x+width) is not included in the box. The box intersection check was seemingly written for inclusive regions, and would falsely assert that adjacent boxes would overlap. Fix the off-by-one by being one pixel less greedy. >>> >>> Is there a reason for this change? I only see this used in a warning >>> in the nine state tracker and virgl (where reporting adjacent >>> intersections is preferred). >> >> This patch was part of a series Daniel worked on to optimize texture >> atlas updates on Vivante GPUs [1]. In the end, this work has been put >> on hold because the perf optimization was not as high as expected, but >> it might be resurrected at some point. >> Anyway, back to the point. In this patchset, the pipe_region_overlaps() >> helper needs to check when regions overlap and not when they're >> adjacent. If other users need u_box_test_intersection_2d() to also >> detect when boxes are adjacent, then we should definitely keep the code >> unchanged, but maybe it's worth a comment in the code to clarify the >> behavior. > > Thanks for the information. You can just modify this function to be > something like: > > u_box_test_intersection_2d(const struct pipe_box *a, const struct > pipe_box *b, boolean adjacent_allowed) > > [or add another function --- whatever you prefer] > > That way we can keep behavior for virgl/nine unchanged. I can't see why you'd want to know if the regions are adjacent? If they are adjacent you can still do blits etc. without having to worry about overwriting src regions etc. Now for 1d regions (buffers) I could see adjacent being useful - could use that to combine multiple ranges into one for instance. But I don't think you'd want to use a 2d intersect test for that... Roland > >> >> Regards, >> >> Boris >> >> [1]https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitlab.collabora.com%2Fbbrezillon%2Fmesa%2Fcommits%2Fetna-texture-atlas-18.2.4data=02%7C01%7Csroland%40vmware.com%7Ce72daea7c212452556f208d69dd47aa1%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636869933286320567sdata=lHnhl1gM19Gt%2FU3KVv%2FlpBgPXFoSl4BqwZ93yHgbbRQ%3Dreserved=0 >> >>> Signed-off-by: Daniel Stone Signed-off-by: Boris Brezillon --- src/gallium/auxiliary/util/u_box.h | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/gallium/auxiliary/util/u_box.h b/src/gallium/auxiliary/util/u_box.h index b3f478e7bfc4..ead7189ecaf8 100644 --- a/src/gallium/auxiliary/util/u_box.h +++ b/src/gallium/auxiliary/util/u_box.h @@ -161,15 +161,15 @@ u_box_test_intersection_2d(const struct pipe_box *a, unsigned i; int a_l[2], a_r[2], b_l[2], b_r[2]; - a_l[0] = MIN2(a->x, a->x + a->width); - a_r[0] = MAX2(a->x, a->x + a->width); - a_l[1] = MIN2(a->y, a->y + a->height); - a_r[1] = MAX2(a->y, a->y + a->height); + a_l[0] = MIN2(a->x, a->x + a->width - 1); + a_r[0] = MAX2(a->x, a->x + a->width - 1); + a_l[1] = MIN2(a->y, a->y + a->height - 1); + a_r[1] = MAX2(a->y, a->y + a->height - 1); - b_l[0] = MIN2(b->x, b->x + b->width); - b_r[0] = MAX2(b->x, b->x + b->width); - b_l[1] = MIN2(b->y, b->y + b->height); - b_r[1] = MAX2(b->y, b->y + b->height); + b_l[0] = MIN2(b->x, b->x + b->width - 1); + b_r[0] = MAX2(b->x, b->x + b->width - 1); + b_l[1] = MIN2(b->y, b->y + b->height - 1); + b_r[1] = MAX2(b->y, b->y + b->height - 1); for (i = 0; i < 2; ++i) { if (a_l[i] > b_r[i] || a_r[i] < b_l[i]) -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fmesa-devdata=02%7C01%7Csroland%40vmware.com%7Ce72daea7c212452556f208d69dd47aa1%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636869933286320567sdata=%2FSECNIFewcH6gECXxq94DXvX6QfN8PEKpDQd3h%2Boxz8%3Dreserved=0 >> > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fmesa-devdata=02%7C01%7Csroland%40vmware.com%7Ce72daea7c212452556f208d69dd47aa1%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636869933286320567sdata=%2FSECNIFewcH6gECXxq94DXvX6QfN8PEKpDQd3h%2Boxz8%3Dreserved=0 > ___ mesa-dev mailing list
Re: [Mesa-dev] [Review Request (master branch)] svga: Fix typo s/VGPU10_MAX_FS_INPUTS/VGPU10_MAX_PS_INPUTS/
On 02/28/2019 08:00 PM, Neha Bhende wrote: This caused vmware driver build failure Fixes: 6010d7b8e8 ("gallium: add PIPE_CAP_MAX_VARYINGS") --- src/gallium/drivers/svga/svga_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 6cb5a14..bcb47bb 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -351,7 +351,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: return sws->have_sm4_1 ? 1 : 0; /* only single-channel textures */ case PIPE_CAP_MAX_VARYINGS: - return sws->have_vgpu10 ? VGPU10_MAX_FS_INPUTS : 10; + return sws->have_vgpu10 ? VGPU10_MAX_PS_INPUTS : 10; /* Unsupported features */ case PIPE_CAP_TEXTURE_MIRROR_CLAMP: Reviewed-by: Brian Paul ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 109805] GPU hangs with error VM_CONTEXT1_PROTECTION_FAULT_STATUS
https://bugs.freedesktop.org/show_bug.cgi?id=109805 --- Comment #1 from rainbowsforthe...@gmail.com --- Created attachment 143504 --> https://bugs.freedesktop.org/attachment.cgi?id=143504=edit glxinfo output -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 109805] GPU hangs with error VM_CONTEXT1_PROTECTION_FAULT_STATUS
https://bugs.freedesktop.org/show_bug.cgi?id=109805 Bug ID: 109805 Summary: GPU hangs with error VM_CONTEXT1_PROTECTION_FAULT_STATUS Product: Mesa Version: 18.3 Hardware: x86-64 (AMD64) OS: Linux (All) Status: NEW Severity: normal Priority: medium Component: Mesa core Assignee: mesa-dev@lists.freedesktop.org Reporter: rainbowsforthe...@gmail.com QA Contact: mesa-dev@lists.freedesktop.org Created attachment 143503 --> https://bugs.freedesktop.org/attachment.cgi?id=143503=edit dmesg output Seemingly at random during intensive games, the graphics will become distorted or corrupted, and this is accompanied by variations on the following errors: [ 7935.967417] amdgpu :23:00.0: GPU fault detected: 146 0x0020c40c for process GTA5.exe pid 17653 thread GTA5.exe pid 17653 [ 7935.967427] amdgpu :23:00.0: VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x0004 [ 7935.967430] amdgpu :23:00.0: VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x0A0C400C [ 7935.967433] amdgpu :23:00.0: VM fault (0x0c, vmid 5, pasid 32781) at page 4, read from 'TC3' (0x54433300) (196) Once this happens, the entire system will eventually lock up after a seemingly random amount of time while the GPU is being used. For example, I could experience the error in GTA V using Proton, then close it and launch a completely different game, and it will eventually freeze, whereas if I have not experienced the error, the game will run fine indefinitely. System specs: CPU: AMD Ryzen R5 1600 Motherboard: MSI B350 Tomahawk RAM: 16GB Corsair Vengeance LPX 3200MHz GPU: 8GB Sapphire RX 480 Nitro+ PSU: 750W Corsair CS750M Distribution: Anarchy Linux rolling 64-bit Kernel: 4.20.13-arch1-1-ARCH OpenGL renderer string: AMD Radeon (TM) RX 480 Graphics (POLARIS10, DRM 3.27.0, 4.20.13-arch1-1-ARCH, LLVM 7.0.1) OpenGL core profile version string: 4.5 (Core Profile) Mesa 18.3.4 Proton version: 3.16-7 Beta DXVK version: 0.96 -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Review Request (master branch)] svga: Fix typo s/VGPU10_MAX_FS_INPUTS/VGPU10_MAX_PS_INPUTS/
This caused vmware driver build failure Fixes: 6010d7b8e8 ("gallium: add PIPE_CAP_MAX_VARYINGS") --- src/gallium/drivers/svga/svga_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 6cb5a14..bcb47bb 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -351,7 +351,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: return sws->have_sm4_1 ? 1 : 0; /* only single-channel textures */ case PIPE_CAP_MAX_VARYINGS: - return sws->have_vgpu10 ? VGPU10_MAX_FS_INPUTS : 10; + return sws->have_vgpu10 ? VGPU10_MAX_PS_INPUTS : 10; /* Unsupported features */ case PIPE_CAP_TEXTURE_MIRROR_CLAMP: -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] intel/compiler: implement more algebraic optimizations
On 2/28/19 4:47 AM, Iago Toral wrote: > On Wed, 2019-02-27 at 17:04 -0800, Ian Romanick wrote: >> On 2/27/19 4:45 AM, Iago Toral Quiroga wrote: >>> Now that we propagate constants to the first source of 2src >>> instructions we >>> see more opportunities of constant folding in the backend. >>> >>> Shader-db results on KBL: >>> >>> total instructions in shared programs: 14965607 -> 14855983 (- >>> 0.73%) >>> instructions in affected programs: 3988102 -> 3878478 (-2.75%) >>> helped: 14292 >>> HURT: 59 >>> >>> total cycles in shared programs: 344324295 -> 340656008 (-1.07%) >>> cycles in affected programs: 247527740 -> 243859453 (-1.48%) >>> helped: 14056 >>> HURT: 3314 >>> >>> total loops in shared programs: 4283 -> 4283 (0.00%) >>> loops in affected programs: 0 -> 0 >>> helped: 0 >>> HURT: 0 >>> >>> total spills in shared programs: 27812 -> 24350 (-12.45%) >>> spills in affected programs: 24921 -> 21459 (-13.89%) >>> helped: 345 >>> HURT: 19 >>> >>> total fills in shared programs: 24173 -> 22032 (-8.86%) >>> fills in affected programs: 21124 -> 18983 (-10.14%) >>> helped: 355 >>> HURT: 25 >> >> Ignore my previous questions about nir_opt_constant_folding after >> nir_opt_algebraic_late. I had done that because I added a bunch of >> things to nir_opt_algebraic_late that created my constant folding >> opportunities. >> >> This is the combined changes for this patch and the previous >> patch. For >> this patch alone, I got: >> >> total instructions in shared programs: 15306213 -> 15221518 (-0.55%) >> instructions in affected programs: 2911451 -> 2826756 (-2.91%) >> helped: 13121 >> HURT: 44 >> helped stats (abs) min: 1 max: 51 x̄: 6.66 x̃: 6 >> helped stats (rel) min: <.01% max: 16.67% x̄: 4.27% x̃: 3.30% >> HURT stats (abs) min: 3 max: 453 x̄: 61.16 x̃: 5 >> HURT stats (rel) min: 0.20% max: 151.00% x̄: 31.57% x̃: 19.23% >> 95% mean confidence interval for instructions value: -6.61 -6.26 >> 95% mean confidence interval for instructions %-change: -4.23% -4.07% >> Instructions are helped. >> >> total cycles in shared programs: 375419164 -> 372829148 (-0.69%) >> cycles in affected programs: 146769299 -> 144179283 (-1.76%) >> helped: 10992 >> HURT: 1833 >> helped stats (abs) min: 1 max: 56127 x̄: 250.29 x̃: 18 >> helped stats (rel) min: <.01% max: 40.52% x̄: 3.11% x̃: 2.58% >> HURT stats (abs) min: 1 max: 1718 x̄: 87.93 x̃: 42 >> HURT stats (rel) min: <.01% max: 139.33% x̄: 7.74% x̃: 3.08% >> 95% mean confidence interval for cycles value: -248.21 -155.69 >> 95% mean confidence interval for cycles %-change: -1.67% -1.44% >> Cycles are helped. >> >> total spills in shared programs: 28828 -> 2 (0.21%) >> spills in affected programs: 2037 -> 2097 (2.95%) >> helped: 0 >> HURT: 24 >> >> total fills in shared programs: 35542 -> 35639 (0.27%) >> fills in affected programs: 3078 -> 3175 (3.15%) >> helped: 2 >> HURT: 26 >> >> I decided to look at some of the hurt shaders... it looks like some >> of >> the Unigine geometry shaders really took a beating (+150% >> instructions). >> Note the "max" in the "instructions in affected programs" above. > > I am seeing quite different results on my KBL laptop: > > total instructions in shared programs: 14945933 -> 14858158 (-0.59%) > instructions in affected programs: 2842901 -> 2755126 (-3.09%) > helped: 13196 > HURT: 5 > > instructions HURT: shaders/closed/steam/deus-ex-mankind- > divided/274.shader_test CS SIMD8: 1535 -> 1538 (0.20%) > instructions HURT: shaders/closed/steam/deus-ex-mankind- > divided/184.shader_test CS SIMD8: 1535 -> 1538 (0.20%) > instructions HURT: shaders/dolphin/ubershaders/147.shader_test FS > SIMD8: 3481 -> 3491 (0.29%) > instructions HURT: shaders/dolphin/ubershaders/156.shader_test FS > SIMD8: 3465 -> 3475 (0.29%) > instructions HURT: shaders/dolphin/ubershaders/138.shader_test FS > SIMD8: 3465 -> 3475 (0.29%) > > Did you test on a different gen? Can you paste here the paths of some > of the GS shaders where you see the big regressions so I can verify I > have them in my shader-db? > > Also, how did you test this patch exactly? When I was going to capture > the reference shader-db results for patch 2 in this series so I could > extract the results for patch 3 by comparing against it, I noticed that > patch 2 would create constant folding scenarios (for example for ADD > and MUL) that, before this patch, would hit an assertion in the driver > since the algebraic pass only expects to find these opportunities for F > types and will assert on that, so I guess you noticed this and fixed it > before taking your numbers? I ran it through my usual shader-db gauntlet that runs shader-db at each commit for SKL, BDW, HSW, IVB, SNB, ILK, and GM45. *But* since one pass of that takes a really, really long time, I only run release builds with -march=native and all the other tricks. None of the assertions would exist in that run. If patch 2 creates possible assertion failures, the two patches should probably be re-ordered or the previous
Re: [Mesa-dev] [PATCH] gallium/util: Fix off-by-one in box intersection
On Thu, Feb 28, 2019 at 12:39 AM Boris Brezillon wrote: > > Hello Gurchetan, > > On Wed, 27 Feb 2019 10:34:26 -0800 > Gurchetan Singh wrote: > > > On Mon, Feb 25, 2019 at 12:35 AM Boris Brezillon > > wrote: > > > > > > From: Daniel Stone > > > > > > pipe_boxes are x/y + width/height, rather than x0/y0 -> x1/y1. This > > > means that (x+width) is not included in the box. > > > > > > The box intersection check was seemingly written for inclusive regions, > > > and would falsely assert that adjacent boxes would overlap. > > > > > > Fix the off-by-one by being one pixel less greedy. > > > > Is there a reason for this change? I only see this used in a warning > > in the nine state tracker and virgl (where reporting adjacent > > intersections is preferred). > > This patch was part of a series Daniel worked on to optimize texture > atlas updates on Vivante GPUs [1]. In the end, this work has been put > on hold because the perf optimization was not as high as expected, but > it might be resurrected at some point. > Anyway, back to the point. In this patchset, the pipe_region_overlaps() > helper needs to check when regions overlap and not when they're > adjacent. If other users need u_box_test_intersection_2d() to also > detect when boxes are adjacent, then we should definitely keep the code > unchanged, but maybe it's worth a comment in the code to clarify the > behavior. Thanks for the information. You can just modify this function to be something like: u_box_test_intersection_2d(const struct pipe_box *a, const struct pipe_box *b, boolean adjacent_allowed) [or add another function --- whatever you prefer] That way we can keep behavior for virgl/nine unchanged. > > Regards, > > Boris > > [1]https://gitlab.collabora.com/bbrezillon/mesa/commits/etna-texture-atlas-18.2.4 > > > > > > > > > Signed-off-by: Daniel Stone > > > Signed-off-by: Boris Brezillon > > > --- > > > src/gallium/auxiliary/util/u_box.h | 16 > > > 1 file changed, 8 insertions(+), 8 deletions(-) > > > > > > diff --git a/src/gallium/auxiliary/util/u_box.h > > > b/src/gallium/auxiliary/util/u_box.h > > > index b3f478e7bfc4..ead7189ecaf8 100644 > > > --- a/src/gallium/auxiliary/util/u_box.h > > > +++ b/src/gallium/auxiliary/util/u_box.h > > > @@ -161,15 +161,15 @@ u_box_test_intersection_2d(const struct pipe_box *a, > > > unsigned i; > > > int a_l[2], a_r[2], b_l[2], b_r[2]; > > > > > > - a_l[0] = MIN2(a->x, a->x + a->width); > > > - a_r[0] = MAX2(a->x, a->x + a->width); > > > - a_l[1] = MIN2(a->y, a->y + a->height); > > > - a_r[1] = MAX2(a->y, a->y + a->height); > > > + a_l[0] = MIN2(a->x, a->x + a->width - 1); > > > + a_r[0] = MAX2(a->x, a->x + a->width - 1); > > > + a_l[1] = MIN2(a->y, a->y + a->height - 1); > > > + a_r[1] = MAX2(a->y, a->y + a->height - 1); > > > > > > - b_l[0] = MIN2(b->x, b->x + b->width); > > > - b_r[0] = MAX2(b->x, b->x + b->width); > > > - b_l[1] = MIN2(b->y, b->y + b->height); > > > - b_r[1] = MAX2(b->y, b->y + b->height); > > > + b_l[0] = MIN2(b->x, b->x + b->width - 1); > > > + b_r[0] = MAX2(b->x, b->x + b->width - 1); > > > + b_l[1] = MIN2(b->y, b->y + b->height - 1); > > > + b_r[1] = MAX2(b->y, b->y + b->height - 1); > > > > > > for (i = 0; i < 2; ++i) { > > >if (a_l[i] > b_r[i] || a_r[i] < b_l[i]) > > > -- > > > 2.20.1 > > > > > > ___ > > > mesa-dev mailing list > > > mesa-dev@lists.freedesktop.org > > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 109443] Build failure with MSVC when using Scons >= 3.0.2
https://bugs.freedesktop.org/show_bug.cgi?id=109443 --- Comment #10 from William Deegan --- If you'd like to be notified when the fix gets merged into SCons please add yourself to: https://github.com/SCons/scons/pull/3311 Hopefully I'll get this wrapped up (need a test to cover this) and into master this weekend and it'll make it into the next release (3.0.5 or 3.1 are the most likely version strings) -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 109615] 19.0.0_rc4 fails u_format_test on ppc64
https://bugs.freedesktop.org/show_bug.cgi?id=109615 Matt Turner changed: What|Removed |Added Blocks|109535 | --- Comment #6 from Matt Turner --- Let's remove from the 19.0 release tracker. Referenced Bugs: https://bugs.freedesktop.org/show_bug.cgi?id=109535 [Bug 109535] [Tracker] Mesa 19.0 release tracker -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 109535] [Tracker] Mesa 19.0 release tracker
https://bugs.freedesktop.org/show_bug.cgi?id=109535 Matt Turner changed: What|Removed |Added Depends on|109615 | Referenced Bugs: https://bugs.freedesktop.org/show_bug.cgi?id=109615 [Bug 109615] 19.0.0_rc4 fails u_format_test on ppc64 -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 109443] Build failure with MSVC when using Scons >= 3.0.2
https://bugs.freedesktop.org/show_bug.cgi?id=109443 Jose Fonseca changed: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |FIXED Component|glsl-compiler |Mesa core QA Contact|intel-3d-bugs@lists.freedes |mesa-dev@lists.freedesktop. |ktop.org|org --- Comment #9 from Jose Fonseca --- Workaround pushed. Thanks everybody. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/5] RadeonSI: Displayable DCC for Ravens
Hi, This series enables DCC for scanout on Ravens. It requires kernel driver version >= 3.31.0 and my xf86-video-amdgpu patch. There is one issue to resolve: Steam crashes in addrlib/ComputeDccAddrFromCoord. Please review, Thanks, Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/5] radeonsi: add support for displayable DCC for 1 RB chips
From: Marek Olšák This is the simpler codepath - just disable RB and pipe alignment for DCC. --- src/amd/common/ac_gpu_info.c | 2 + src/amd/common/ac_gpu_info.h | 3 + src/amd/common/ac_surface.c| 25 +++- src/amd/common/ac_surface.h| 2 +- src/gallium/drivers/radeon/radeon_winsys.h | 6 ++ src/gallium/drivers/radeonsi/si_texture.c | 74 -- src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 8 +++ 7 files changed, 113 insertions(+), 7 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index fc8c6a09d2f..a6d249a6d2f 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -496,20 +496,22 @@ void ac_print_gpu_info(struct radeon_info *info) info->pci_domain, info->pci_bus, info->pci_dev, info->pci_func); printf("pci_id = 0x%x\n", info->pci_id); printf("family = %i\n", info->family); printf("chip_class = %i\n", info->chip_class); printf("num_compute_rings = %u\n", info->num_compute_rings); printf("num_sdma_rings = %i\n", info->num_sdma_rings); printf("clock_crystal_freq = %i\n", info->clock_crystal_freq); printf("tcc_cache_line_size = %u\n", info->tcc_cache_line_size); + printf("use_display_dcc_unaligned = %u\n", info->use_display_dcc_unaligned); + printf("Memory info:\n"); printf("pte_fragment_size = %u\n", info->pte_fragment_size); printf("gart_page_size = %u\n", info->gart_page_size); printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(info->gart_size, 1024*1024)); printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_size, 1024*1024)); printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_vis_size, 1024*1024)); printf("gds_size = %u kB\n", info->gds_size / 1024); printf("gds_gfx_partition_size = %u kB\n", info->gds_gfx_partition_size / 1024); printf("max_alloc_size = %i MB\n", (int)DIV_ROUND_UP(info->max_alloc_size, 1024*1024)); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index b1ef9c53734..99fed520618 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -49,20 +49,23 @@ struct radeon_info { /* Device info. */ const char *name; uint32_tpci_id; enum radeon_family family; enum chip_class chip_class; uint32_tnum_compute_rings; uint32_tnum_sdma_rings; uint32_tclock_crystal_freq; uint32_ttcc_cache_line_size; + /* Disable RB and pipe alignment to skip the retile blit. (1 RB chips only) */ + booluse_display_dcc_unaligned; + /* Memory info. */ uint32_tpte_fragment_size; uint32_tgart_page_size; uint64_tgart_size; uint64_tvram_size; uint64_tvram_vis_size; unsignedgds_size; unsignedgds_gfx_partition_size; uint64_tmax_alloc_size; uint32_tmin_alloc_size; diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index 91004e032a3..6802ab2badb 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -471,21 +471,22 @@ static unsigned cik_get_macro_tile_index(struct radeon_surf *surf) assert(index < 16); return index; } static bool get_display_flag(const struct ac_surf_config *config, const struct radeon_surf *surf) { unsigned num_channels = config->info.num_channels; unsigned bpe = surf->bpe; - if (surf->flags & RADEON_SURF_SCANOUT && + if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && + surf->flags & RADEON_SURF_SCANOUT && config->info.samples <= 1 && surf->blk_w <= 2 && surf->blk_h == 1) { /* subsampled */ if (surf->blk_w == 2 && surf->blk_h == 1) return true; if (/* RGBA8 or RGBA16F */ (bpe >= 4 && bpe <= 8 && num_channels == 4) || /* R5G6B5 or R5G5B5A1 */ (bpe == 2 && num_channels >= 3) || @@ -1208,21 +1209,21 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, din.numFrags = in->numFrags; din.numMipLevels = in->numMipLevels; din.dataSurfaceSize = out.surfSize; ret = Addr2ComputeDccInfo(addrlib, , ); if (ret != ADDR_OK) return
[Mesa-dev] [PATCH 4/5] radeonsi: add support for displayable DCC for multi-RB chips
From: Marek Olšák A compute shader is used to reorder DCC data from aligned to unaligned. --- src/amd/common/ac_gpu_info.c | 1 + src/amd/common/ac_gpu_info.h | 3 + src/amd/common/ac_surface.c | 125 -- src/amd/common/ac_surface.h | 15 ++- src/gallium/drivers/radeonsi/cik_sdma.c | 3 +- src/gallium/drivers/radeonsi/si_blit.c| 3 + .../drivers/radeonsi/si_compute_blit.c| 80 +++ src/gallium/drivers/radeonsi/si_pipe.c| 2 + src/gallium/drivers/radeonsi/si_pipe.h| 15 ++- .../drivers/radeonsi/si_shaderlib_tgsi.c | 73 ++ src/gallium/drivers/radeonsi/si_texture.c | 83 +++- 11 files changed, 389 insertions(+), 14 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index a6d249a6d2f..d890172227c 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -497,20 +497,21 @@ void ac_print_gpu_info(struct radeon_info *info) info->pci_dev, info->pci_func); printf("pci_id = 0x%x\n", info->pci_id); printf("family = %i\n", info->family); printf("chip_class = %i\n", info->chip_class); printf("num_compute_rings = %u\n", info->num_compute_rings); printf("num_sdma_rings = %i\n", info->num_sdma_rings); printf("clock_crystal_freq = %i\n", info->clock_crystal_freq); printf("tcc_cache_line_size = %u\n", info->tcc_cache_line_size); printf("use_display_dcc_unaligned = %u\n", info->use_display_dcc_unaligned); + printf("use_display_dcc_with_retile_blit = %u\n", info->use_display_dcc_with_retile_blit); printf("Memory info:\n"); printf("pte_fragment_size = %u\n", info->pte_fragment_size); printf("gart_page_size = %u\n", info->gart_page_size); printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(info->gart_size, 1024*1024)); printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_size, 1024*1024)); printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_vis_size, 1024*1024)); printf("gds_size = %u kB\n", info->gds_size / 1024); printf("gds_gfx_partition_size = %u kB\n", info->gds_gfx_partition_size / 1024); printf("max_alloc_size = %i MB\n", diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 99fed520618..5241c28f2a7 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -49,22 +49,25 @@ struct radeon_info { /* Device info. */ const char *name; uint32_tpci_id; enum radeon_family family; enum chip_class chip_class; uint32_tnum_compute_rings; uint32_tnum_sdma_rings; uint32_tclock_crystal_freq; uint32_ttcc_cache_line_size; + /* There are 2 display DCC codepaths, because display expects unaligned DCC. */ /* Disable RB and pipe alignment to skip the retile blit. (1 RB chips only) */ booluse_display_dcc_unaligned; + /* Allocate both aligned and unaligned DCC and use the retile blit. */ + booluse_display_dcc_with_retile_blit; /* Memory info. */ uint32_tpte_fragment_size; uint32_tgart_page_size; uint64_tgart_size; uint64_tvram_size; uint64_tvram_vis_size; unsignedgds_size; unsignedgds_gfx_partition_size; uint64_tmax_alloc_size; diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index 6802ab2badb..7225317f3e7 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -1072,20 +1072,21 @@ gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib, ret = Addr2GetPreferredSurfaceSetting(addrlib, , ); if (ret != ADDR_OK) return ret; *swizzle_mode = sout.swizzleMode; return 0; } static int gfx9_compute_miptree(ADDR_HANDLE addrlib, + const struct radeon_info *info, const struct ac_surf_config *config, struct radeon_surf *surf, bool compressed, ADDR2_COMPUTE_SURFACE_INFO_INPUT *in) { ADDR2_MIP_INFO mip_info[RADEON_SURF_MAX_LEVELS] = {}; ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; ADDR_E_RETURNCODE ret; out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT); out.pMipInfo = mip_info; @@ -1209,21 +1210,20 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
[Mesa-dev] [PATCH 5/5] radeonsi: enable displayable DCC on Ravens
From: Marek Olšák --- src/amd/common/ac_gpu_info.c | 8 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c | 4 2 files changed, 12 insertions(+) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index d890172227c..c53335bbb7d 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -451,20 +451,28 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, ib_align = MAX2(ib_align, dma.ib_start_alignment); ib_align = MAX2(ib_align, uvd.ib_start_alignment); ib_align = MAX2(ib_align, uvd_enc.ib_start_alignment); ib_align = MAX2(ib_align, vce.ib_start_alignment); ib_align = MAX2(ib_align, vcn_dec.ib_start_alignment); ib_align = MAX2(ib_align, vcn_enc.ib_start_alignment); ib_align = MAX2(ib_align, vcn_jpeg.ib_start_alignment); assert(ib_align); info->ib_start_alignment = ib_align; + if (info->drm_minor >= 31 && + (info->family == CHIP_RAVEN || +info->family == CHIP_RAVEN2)) { + if (info->num_render_backends == 1) + info->use_display_dcc_unaligned = true; + else + info->use_display_dcc_with_retile_blit = true; + } return true; } void ac_compute_driver_uuid(char *uuid, size_t size) { char amd_uuid[] = "AMD-MESA-DRV"; assert(size >= sizeof(amd_uuid)); memset(uuid, 0, size); diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c index d3a57f6b4f3..35a585a5693 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c @@ -38,20 +38,24 @@ #include "radv_amdgpu_cs.h" #include "radv_amdgpu_bo.h" #include "radv_amdgpu_surface.h" static bool do_winsys_init(struct radv_amdgpu_winsys *ws, int fd) { if (!ac_query_gpu_info(fd, ws->dev, >info, >amdinfo)) return false; + /* temporary */ + ws->info.use_display_dcc_unaligned = false; + ws->info.use_display_dcc_with_retile_blit = false; + ws->addrlib = amdgpu_addr_create(>info, >amdinfo, >info.max_alignment); if (!ws->addrlib) { fprintf(stderr, "amdgpu: Cannot create addrlib.\n"); return false; } ws->info.num_sdma_rings = MIN2(ws->info.num_sdma_rings, MAX_RINGS_PER_TYPE); ws->info.num_compute_rings = MIN2(ws->info.num_compute_rings, MAX_RINGS_PER_TYPE); ws->use_ib_bos = ws->info.chip_class >= CIK; -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/5] radeonsi: add ability to bind images as image buffers
From: Marek Olšák so that we can bind DCC (texture) as an image buffer. --- src/gallium/drivers/radeonsi/si_descriptors.c | 9 ++--- src/gallium/drivers/radeonsi/si_pipe.h| 2 ++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 0f22c55723c..ce67bdb87c8 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -662,38 +662,40 @@ si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot) images->enabled_mask &= ~(1u << slot); ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); } } static void si_mark_image_range_valid(const struct pipe_image_view *view) { struct si_resource *res = si_resource(view->resource); - assert(res && res->b.b.target == PIPE_BUFFER); + if (res->b.b.target != PIPE_BUFFER) + return; util_range_add(>valid_buffer_range, view->u.buf.offset, view->u.buf.offset + view->u.buf.size); } static void si_set_shader_image_desc(struct si_context *ctx, const struct pipe_image_view *view, bool skip_decompress, uint32_t *desc, uint32_t *fmask_desc) { struct si_screen *screen = ctx->screen; struct si_resource *res; res = si_resource(view->resource); - if (res->b.b.target == PIPE_BUFFER) { + if (res->b.b.target == PIPE_BUFFER || + view->shader_access & SI_IMAGE_ACCESS_AS_BUFFER) { if (view->access & PIPE_IMAGE_ACCESS_WRITE) si_mark_image_range_valid(view); si_make_buffer_descriptor(screen, res, view->format, view->u.buf.offset, view->u.buf.size, desc); si_set_buf_desc_address(res, view->u.buf.offset, desc + 4); } else { static const unsigned char swizzle[4] = { 0, 1, 2, 3 }; @@ -780,21 +782,22 @@ static void si_set_shader_image(struct si_context *ctx, return; } res = si_resource(view->resource); if (>views[slot] != view) util_copy_image_view(>views[slot], view); si_set_shader_image_desc(ctx, view, skip_decompress, desc, NULL); - if (res->b.b.target == PIPE_BUFFER) { + if (res->b.b.target == PIPE_BUFFER || + view->shader_access & SI_IMAGE_ACCESS_AS_BUFFER) { images->needs_color_decompress_mask &= ~(1 << slot); res->bind_history |= PIPE_BIND_SHADER_IMAGE; } else { struct si_texture *tex = (struct si_texture *)res; unsigned level = view->u.tex.level; if (color_needs_decompression(tex)) { images->needs_color_decompress_mask |= 1 << slot; } else { images->needs_color_decompress_mask &= ~(1 << slot); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 39152587a99..6765dcb3275 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -116,20 +116,22 @@ enum si_clear_code { DCC_CLEAR_COLOR_ = 0x, DCC_CLEAR_COLOR_0001 = 0x40404040, DCC_CLEAR_COLOR_1110 = 0x80808080, DCC_CLEAR_COLOR_ = 0xC0C0C0C0, DCC_CLEAR_COLOR_REG= 0x20202020, DCC_UNCOMPRESSED = 0x, }; +#define SI_IMAGE_ACCESS_AS_BUFFER (1 << 7) + /* Debug flags. */ enum { /* Shader logging options: */ DBG_VS = PIPE_SHADER_VERTEX, DBG_PS = PIPE_SHADER_FRAGMENT, DBG_GS = PIPE_SHADER_GEOMETRY, DBG_TCS = PIPE_SHADER_TESS_CTRL, DBG_TES = PIPE_SHADER_TESS_EVAL, DBG_CS = PIPE_SHADER_COMPUTE, DBG_NO_IR, -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/5] radeonsi/gfx9: add support for PIPE_ALIGNED=0
From: Marek Olšák Needed by displayable DCC. We need to flush L2 after rendering if PIPE_ALIGNED=0 and DCC is enabled. --- src/gallium/drivers/radeonsi/si_blit.c| 7 --- .../drivers/radeonsi/si_compute_blit.c| 9 +++-- src/gallium/drivers/radeonsi/si_pipe.h| 6 -- src/gallium/drivers/radeonsi/si_state.c | 20 ++- 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index f39cb5d143f..7613a63e3cb 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -414,21 +414,21 @@ si_decompress_depth(struct si_context *sctx, */ si_make_DB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, inplace_planes & PIPE_MASK_S, tc_compat_htile); } /* set_framebuffer_state takes care of coherency for single-sample. * The DB->CB copy uses CB for the final writes. */ if (copy_planes && tex->buffer.b.b.nr_samples > 1) si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, - false); + false, true /* no DCC */); } static void si_decompress_sampler_depth_textures(struct si_context *sctx, struct si_samplers *textures) { unsigned i; unsigned mask = textures->needs_depth_decompress_mask; while (mask) { @@ -527,21 +527,22 @@ static void si_blit_decompress_color(struct si_context *sctx, /* The texture will always be dirty if some layers aren't flushed. * I don't think this case occurs often though. */ if (first_layer == 0 && last_layer >= max_layer) { tex->dirty_level_mask &= ~(1 << level); } } sctx->decompression_enabled = false; si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, - vi_dcc_enabled(tex, first_level)); + vi_dcc_enabled(tex, first_level), + tex->surface.u.gfx9.dcc.pipe_aligned); } static void si_decompress_color_texture(struct si_context *sctx, struct si_texture *tex, unsigned first_level, unsigned last_level) { /* CMASK or DCC can be discarded and we can still end up here. */ if (!tex->cmask_buffer && !tex->surface.fmask_size && !tex->dcc_offset) return; @@ -1069,21 +1070,21 @@ static void si_do_CB_resolve(struct si_context *sctx, si_blitter_begin(sctx, SI_COLOR_RESOLVE | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, dst_z, info->src.resource, info->src.box.z, ~0, sctx->custom_blend_resolve, format); si_blitter_end(sctx); /* Flush caches for possible texturing. */ - si_make_CB_shader_coherent(sctx, 1, false); + si_make_CB_shader_coherent(sctx, 1, false, true /* no DCC */); } static bool do_hardware_msaa_resolve(struct pipe_context *ctx, const struct pipe_blit_info *info) { struct si_context *sctx = (struct si_context*)ctx; struct si_texture *src = (struct si_texture*)info->src.resource; struct si_texture *dst = (struct si_texture*)info->dst.resource; MAYBE_UNUSED struct si_texture *stmp; unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level); diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index f5e9c02dd10..2ce56d6a81a 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -317,21 +317,25 @@ void si_compute_copy_image(struct si_context *sctx, unsigned depth = src_box->depth; unsigned data[] = {src_box->x, src_box->y, src_box->z, 0, dstx, dsty, dstz, 0}; if (width == 0 || height == 0) return; si_compute_internal_begin(sctx); sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); - si_make_CB_shader_coherent(sctx, dst->nr_samples, true); + + /* src and dst have the same number of samples. */ + si_make_CB_shader_coherent(sctx, src->nr_samples, true, + /* Only src can have DCC.*/ + ((struct si_texture*)src)->surface.u.gfx9.dcc.pipe_aligned); struct pipe_constant_buffer saved_cb = {};
[Mesa-dev] [Bug 109803] page not found is showing
https://bugs.freedesktop.org/show_bug.cgi?id=109803 Bug ID: 109803 Summary: page not found is showing Product: Mesa Version: 7.4 Hardware: Other OS: Windows (All) Status: NEW Severity: normal Priority: medium Component: GLUT Assignee: mesa-dev@lists.freedesktop.org Reporter: sdindi...@gmail.com QA Contact: mesa-dev@lists.freedesktop.org -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] RFC: Workaround for pthread_setaffinity_np() seccomp filtering
On Thu, Feb 28, 2019 at 11:13 AM Marc-André Lureau < marcandre.lur...@gmail.com> wrote: > Hi Eero! > > (ex-colleagues, long time ago!) > > On Thu, Feb 28, 2019 at 1:37 PM Eero Tamminen > wrote: > > > > Hi, > > > > On 28.2.2019 11.57, Marc-André Lureau wrote: > > > On Thu, Feb 28, 2019 at 1:17 AM Marek Olšák wrote: > > >> I'd rather have something more robust than an env var, like catching > SIGSYS. > > > > SIGSYS is info for the invoking parent, not the (Mesa) process doing the > > syscall. > > > > From "man 2 seccomp": > > > > The process terminates as though killed by a SIGSYS signal. Even if a > > signal handler has been registered for SIGSYS, the handler will be > > ignored in this case and the process always terminates. To a parent > > process that is waiting on this process (using waitpid(2) or similar), > > the returned wstatus will indicate that its child was terminated as > > though by a SIGSYS signal. > > > > > > > With current qemu in most distros, it defaults to SIGSYS (we switched > > > away from SCMP_ACT_KILL, which had other problems). With more recent > > > qemu/libseccomp, it will default to SCMP_ACT_KILL_PROCESS. In those > > > KILL action cases, mesa will not be able to catch the failing > > > syscalls. > > > > Qemu / libvirt isn't the only thing using seccomp. > > > > For example Docker enables seccomp filters (along with capability > > restrictions) for the invoked containers unless that is explicitly > > disabled: > > https://docs.docker.com/engine/security/seccomp/ > > > > What actually gets filtered, is trivially changeable on Docker command > > line by giving a JSON file specifying the syscall filtering. > > > > Default policy seems to be white-listing affinity syscall: > > > https://github.com/moby/moby/blob/master/profiles/seccomp/default.json > > > > > > Why distro versions of Qemu filter sched_setaffinity() syscall? > > > > > > (https://bugs.launchpad.net/ubuntu/+source/qemu/+bug/1815889) > > Daniel Berrange (berrange) wrote on 2019-02-27: #19 > > "IMHO that mesa change is not valid. It is settings its affinity to > run on all threads which is definitely *NOT* something we want to be > allowed. Management applications want to control which CPUs QEMU runs > on, and as such Mesa should honour the CPU placement that the QEMU > process has. > > This is a great example of why QEMU wants to use seccomp to block > affinity changes to prevent something silently trying to use more CPUs > than are assigned to this QEMU." > Mesa uses thread affinity to optimize memory access performance on some CPUs (see util_pin_thread_to_L3). Other places in Mesa need to restore the original thread affinity for some child threads. Additionally, if games limit the thread affinity, Mesa needs to restore the full thread affinity for some of its child threads. In essence, the thread affinity should only be considered a hint for the kernel for optimal performance. There is no reason to kill the process if it's disallowed. Just ignore the call or modify the thread mask to make it legal. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] scons: Workaround failures with MSVC when using SCons 3.0.[2-4].
On 02/28/2019 03:03 AM, Jose Fonseca wrote: This change applies the workaround suggested by Bill Deegan on the affected SCons versions. It also adds a comment with the URL explaining why we were using customizing the decider and max_drift in the first place, as I had forgotten all about it. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109443 Tested-by: liviupro...@yahoo.com --- scons/gallium.py | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scons/gallium.py b/scons/gallium.py index 963834a5fbc..efe32e06c6c 100755 --- a/scons/gallium.py +++ b/scons/gallium.py @@ -308,7 +308,13 @@ def generate(env): if env.GetOption('num_jobs') <= 1: env.SetOption('num_jobs', num_jobs()) -env.Decider('MD5-timestamp') +# Speed up dependency checking. See +# - https://github.com/SCons/scons/wiki/GoFastButton +# - https://bugs.freedesktop.org/show_bug.cgi?id=109443 +scons_version = distutils.version.StrictVersion(SCons.__version__) +if scons_version < distutils.version.StrictVersion('3.0.2') or \ + scons_version > distutils.version.StrictVersion('3.0.4'): +env.Decider('MD5-timestamp') env.SetOption('max_drift', 60) # C preprocessor options LGTM. Reviewed-by: Brian Paul ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 109131] cc1plus: error: unrecognized command line option "-std=c++11"
https://bugs.freedesktop.org/show_bug.cgi?id=109131 Dylan Baker changed: What|Removed |Added Blocks|109535 | --- Comment #5 from Dylan Baker --- I haven't seen any other reports of this, so I'm going to remove it rom the 19.0 blocker. I don't think it's worth it to block the release. Referenced Bugs: https://bugs.freedesktop.org/show_bug.cgi?id=109535 [Bug 109535] [Tracker] Mesa 19.0 release tracker -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 109535] [Tracker] Mesa 19.0 release tracker
https://bugs.freedesktop.org/show_bug.cgi?id=109535 Dylan Baker changed: What|Removed |Added Depends on|109131 | Referenced Bugs: https://bugs.freedesktop.org/show_bug.cgi?id=109131 [Bug 109131] cc1plus: error: unrecognized command line option "-std=c++11" -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH 1/2] st/nine: Ignore window size if error
On 28/02/2019 12:54, Emil Velikov wrote: On Wed, 27 Feb 2019 at 22:49, Axel Davy wrote: Check GetWindowInfo and ignore the computed sizes if there is an error. Fixes the regression caused by: commit 2318ca68bbeb4fa6e21a4d8c650cec3f64246596 "st/nine: Handle window resize when a presentation buffer is used" when using old wine gallium nine patches Related issues: https://github.com/iXit/Mesa-3D/issues/331 https://github.com/iXit/Mesa-3D/issues/332 Fixes also crash at window destruction. Cc: mesa-sta...@lists.freedesktop.org Signed-off-by: Axel Davy --- Nittiest of nits: the following takes 1/3 the cognitive effort. Nittiest of nits do matter. Thank you for the suggestion, I shall take replace the commit message with your suggestion ! Check GetWindowInfo and ignore the computed sizes if there is an error. Fixes a regression caused by earlier commit when using old wine gallium nine patches. Should also address a crash at window destruction. Related issues: https://github.com/iXit/Mesa-3D/issues/331 https://github.com/iXit/Mesa-3D/issues/332 Cc: mesa-sta...@lists.freedesktop.org Fixes: 2318ca68bbe ("st/nine: Handle window resize when a presentation buffer is used") Signed-off-by: Axel Davy HTH -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] scons: Workaround failures with MSVC when using SCons 3.0.[2-4].
Am 28.02.19 um 11:03 schrieb Jose Fonseca: > This change applies the workaround suggested by Bill Deegan on the > affected SCons versions. > > It also adds a comment with the URL explaining why we were using > customizing the decider and max_drift in the first place, as I had > forgotten all about it. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109443 > Tested-by: liviupro...@yahoo.com > --- > scons/gallium.py | 8 +++- > 1 file changed, 7 insertions(+), 1 deletion(-) > > diff --git a/scons/gallium.py b/scons/gallium.py > index 963834a5fbc..efe32e06c6c 100755 > --- a/scons/gallium.py > +++ b/scons/gallium.py > @@ -308,7 +308,13 @@ def generate(env): > if env.GetOption('num_jobs') <= 1: > env.SetOption('num_jobs', num_jobs()) > > -env.Decider('MD5-timestamp') > +# Speed up dependency checking. See > +# - https://github.com/SCons/scons/wiki/GoFastButton > +# - https://bugs.freedesktop.org/show_bug.cgi?id=109443 > +scons_version = distutils.version.StrictVersion(SCons.__version__) > +if scons_version < distutils.version.StrictVersion('3.0.2') or \ > + scons_version > distutils.version.StrictVersion('3.0.4'): > +env.Decider('MD5-timestamp') > env.SetOption('max_drift', 60) > > # C preprocessor options > Reviewed-by: Roland Scheidegger ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v4 34/40] intel/compiler: validate region restrictions for half-float conversions
Iago Toral writes: > On Wed, 2019-02-27 at 13:47 -0800, Francisco Jerez wrote: >> Iago Toral writes: >> >> > On Tue, 2019-02-26 at 14:54 -0800, Francisco Jerez wrote: >> > > Iago Toral Quiroga writes: >> > > >> > > > --- >> > > > src/intel/compiler/brw_eu_validate.c| 64 - >> > > > src/intel/compiler/test_eu_validate.cpp | 122 >> > > > >> > > > 2 files changed, 185 insertions(+), 1 deletion(-) >> > > > >> > > > diff --git a/src/intel/compiler/brw_eu_validate.c >> > > > b/src/intel/compiler/brw_eu_validate.c >> > > > index 000a05cb6ac..203641fecb9 100644 >> > > > --- a/src/intel/compiler/brw_eu_validate.c >> > > > +++ b/src/intel/compiler/brw_eu_validate.c >> > > > @@ -531,7 +531,69 @@ >> > > > general_restrictions_based_on_operand_types(const struct >> > > > gen_device_info *devinf >> > > > exec_type_size == 8 && dst_type_size == 4) >> > > >dst_type_size = 8; >> > > > >> > > > - if (exec_type_size > dst_type_size) { >> > > > + /* From the BDW+ PRM: >> > > > +* >> > > > +*"There is no direct conversion from HF to DF or DF to >> > > > HF. >> > > > +* There is no direct conversion from HF to Q/UQ or >> > > > Q/UQ to >> > > > HF." >> > > > +*/ >> > > > + enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, >> > > > inst); >> > > > + ERROR_IF(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV >> > > > && >> > > >> > > Why is only the MOV instruction handled here and below? Aren't >> > > other >> > > instructions able to do implicit conversions? Probably means you >> > > need >> > > to deal with two sources rather than one. >> > >> > This comes from the programming notes of the MOV instruction >> > (Volume >> > 2a, Command Reference - Instructions - MOV), so it is described >> > specifically for the MOV instruction. I should probably have made >> > this >> > clear in the comment. >> > >> >> Maybe the one above is specified in the MOV page only, probably due >> to >> an oversight (If these restrictions were really specific to the MOV >> instruction, what would prevent you from implementing such >> conversions >> through a different instruction? E.g. "ADD dst:df, src:hf, 0" which >> would be substantially more efficient than what you're doing in PATCH >> 02) > > Instructions that take HF can only be strictly HF or mix F and HF > (mixed mode float), with MOV being the only exception. That means that > any instruction like the one you use above are invalid. Maybe we should > validate explicitly that instructions that use HF are strictly HF or > mixed-float mode only? > So you're acknowledging that the conversions checked above are illegal whether the instruction is a MOV or something else. Where are we preventing instructions other than MOV with such conversions from being accepted by the validator? >> but I see other restriction checks in this patch which are >> certainly specified in the generic regioning restrictions page and >> you're limiting to the MOV instruction... > > There are two rules below: > > 1. The one about conversions between integer and half-float. Again, > these can only happen through MOV for the same reasons, so I think this > one should be fine. > Why do you think that can only happen through a MOV instruction? The hardware spec lists the following as a valid example in the register region restrictions page: | add (8) r10.0<2>:hf r11.0<8;8,1>:w r12.0<8;8,1>:w > 2. The one about word destinations (of which we are only really > implementing conversions from F->HF). Here the rule is more generic and > I agree that expanding this to include any other mixed float mode > instruction would make sense. However, validation for mixed float mode > has its own set rules, some of which are incompatible with the general > region restrictions being validated here, so I think it is inconvenient > to try and do that validation here (see patch 36 and then patch 37). > What I would propose, if you agree, is that we only implement this for > MOV here, and then for mixed float mode instructions, we implement the > more generic version of this check (that would then go in patch 37). > How does that sound? > I still don't understand why you want to implement the same restriction twice, once for MOV and once for all other mixed-mode instructions. How is that more convenient? >> > > > +((dst_type == BRW_REGISTER_TYPE_HF && >> > > > type_sz(src0_type) == 8) || >> > > > + (dst_type_size == 8 && src0_type == >> > > > BRW_REGISTER_TYPE_HF)), >> > > > +"There are no direct conversion between 64-bit >> > > > types >> > > > and HF"); >> > > > + >> > > > + /* From the BDW+ PRM: >> > > > +* >> > > > +* "Conversion between Integer and HF (Half Float) must >> > > > be >> > > > +*DWord-aligned and strided by a DWord on the >> > > > destination." >> > > > +* >> > > > +* But this seems to be expanded on CHV and SKL+ by: >> > > > +* >> > > > +
Re: [Mesa-dev] [PATCH v5 33/40] intel/compiler: also set F execution type for mixed float mode in BDW
Iago Toral writes: > On Wed, 2019-02-27 at 15:44 -0800, Francisco Jerez wrote: >> Iago Toral Quiroga writes: >> >> > The section 'Execution Data Types' of 3D Media GPGPU volume, which >> > describes execution types, is exactly the same in BDW and SKL+. >> > >> > Also, this section states that there is a single execution type, so >> > it >> > makes sense that this is the wider of the two floating point types >> > involved in mixed float mode, which is what we do for SKL+ and CHV. >> > >> > v2: >> > - Make sure we also account for the destination type in mixed mode >> > (Curro). >> > --- >> > src/intel/compiler/brw_eu_validate.c | 39 +--- >> > >> > 1 file changed, 24 insertions(+), 15 deletions(-) >> > >> > diff --git a/src/intel/compiler/brw_eu_validate.c >> > b/src/intel/compiler/brw_eu_validate.c >> > index 358a0347a93..e0010f0fb07 100644 >> > --- a/src/intel/compiler/brw_eu_validate.c >> > +++ b/src/intel/compiler/brw_eu_validate.c >> > @@ -348,6 +348,17 @@ is_unsupported_inst(const struct >> > gen_device_info *devinfo, >> > return brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)) >> > == NULL; >> > } >> > >> > +/** >> > + * Returns whether a combination of two types would qualify as >> > mixed float >> > + * operation mode >> > + */ >> > +static inline bool >> > +types_are_mixed_float(enum brw_reg_type t0, enum brw_reg_type t1) >> > +{ >> > + return (t0 == BRW_REGISTER_TYPE_F && t1 == >> > BRW_REGISTER_TYPE_HF) || >> > + (t1 == BRW_REGISTER_TYPE_F && t0 == >> > BRW_REGISTER_TYPE_HF); >> > +} >> > + >> > static enum brw_reg_type >> > execution_type_for_type(enum brw_reg_type type) >> > { >> > @@ -390,20 +401,24 @@ execution_type(const struct gen_device_info >> > *devinfo, const brw_inst *inst) >> > enum brw_reg_type src0_exec_type, src1_exec_type; >> > >> > /* Execution data type is independent of destination data type, >> > except in >> > -* mixed F/HF instructions on CHV and SKL+. >> > +* mixed F/HF instructions. >> > */ >> > enum brw_reg_type dst_exec_type = brw_inst_dst_type(devinfo, >> > inst); >> > >> > src0_exec_type = >> > execution_type_for_type(brw_inst_src0_type(devinfo, inst)); >> > if (num_sources == 1) { >> > - if ((devinfo->gen >= 9 || devinfo->is_cherryview) && >> > - src0_exec_type == BRW_REGISTER_TYPE_HF) { >> > + if (src0_exec_type == BRW_REGISTER_TYPE_HF) >> > return dst_exec_type; >> > - } >> >return src0_exec_type; >> > } >> > >> > src1_exec_type = >> > execution_type_for_type(brw_inst_src1_type(devinfo, inst)); >> > + if (types_are_mixed_float(src0_exec_type, src1_exec_type) || >> > + types_are_mixed_float(src0_exec_type, dst_exec_type) || >> > + types_are_mixed_float(src1_exec_type, dst_exec_type)) { >> > + return BRW_REGISTER_TYPE_F; >> > + } >> > + >> > if (src0_exec_type == src1_exec_type) >> >return src0_exec_type; >> > >> > @@ -431,18 +446,12 @@ execution_type(const struct gen_device_info >> > *devinfo, const brw_inst *inst) >> > src1_exec_type == BRW_REGISTER_TYPE_DF) >> >return BRW_REGISTER_TYPE_DF; >> > >> > - if (devinfo->gen >= 9 || devinfo->is_cherryview) { >> > - if (dst_exec_type == BRW_REGISTER_TYPE_F || >> > - src0_exec_type == BRW_REGISTER_TYPE_F || >> > - src1_exec_type == BRW_REGISTER_TYPE_F) { >> > - return BRW_REGISTER_TYPE_F; >> > - } else { >> > - return BRW_REGISTER_TYPE_HF; >> > - } >> > - } >> > + if (src0_exec_type == BRW_REGISTER_TYPE_F || >> > + src1_exec_type == BRW_REGISTER_TYPE_F) >> > + return BRW_REGISTER_TYPE_F; >> > >> > - assert(src0_exec_type == BRW_REGISTER_TYPE_F); >> > - return BRW_REGISTER_TYPE_F; >> > + assert(src0_exec_type == BRW_REGISTER_TYPE_HF); >> > + return BRW_REGISTER_TYPE_HF; >> >> Not really convinced the function is fully correct, but it should be >> strictly better with this patch: > > Is it because of this patch in particular or are you talking about the > function in general? > Talking about the function in general, patch looks okay to me. >> Acked-by: Francisco Jerez >> >> > } >> > >> > /** >> > -- >> > 2.17.1 signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 109535] [Tracker] Mesa 19.0 release tracker
https://bugs.freedesktop.org/show_bug.cgi?id=109535 Bug 109535 depends on bug 109055, which changed state. Bug 109055 Summary: ~10% perf drop in Sascha Willems Vulkan Multithreading demo https://bugs.freedesktop.org/show_bug.cgi?id=109055 What|Removed |Added Status|NEW |RESOLVED Resolution|--- |WONTFIX -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] RFC: Workaround for pthread_setaffinity_np() seccomp filtering
Hi, Just catching up on this thread now. My main question is where is issue occurring? Is it some sort of CI system or something along those lines? We don't really consider SWR in an emulated environment to be an intended use case. Generally it is used as the rendering backend for data visualization, which is typically running on the host OS. -Alok > -Original Message- > From: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] On > Behalf Of Marc-André Lureau > Sent: Thursday, February 28, 2019 10:14 AM > To: Tamminen, Eero T > Cc: ML mesa-dev > Subject: Re: [Mesa-dev] [PATCH] RFC: Workaround for > pthread_setaffinity_np() seccomp filtering > > Hi Eero! > > (ex-colleagues, long time ago!) > > On Thu, Feb 28, 2019 at 1:37 PM Eero Tamminen > wrote: > > > > Hi, > > > > On 28.2.2019 11.57, Marc-André Lureau wrote: > > > On Thu, Feb 28, 2019 at 1:17 AM Marek Olšák > wrote: > > >> I'd rather have something more robust than an env var, like catching > SIGSYS. > > > > SIGSYS is info for the invoking parent, not the (Mesa) process doing > > the syscall. > > > > From "man 2 seccomp": > > > > The process terminates as though killed by a SIGSYS signal. Even if a > > signal handler has been registered for SIGSYS, the handler will be > > ignored in this case and the process always terminates. To a parent > > process that is waiting on this process (using waitpid(2) or similar), > > the returned wstatus will indicate that its child was terminated as > > though by a SIGSYS signal. > > > > > > > With current qemu in most distros, it defaults to SIGSYS (we > > > switched away from SCMP_ACT_KILL, which had other problems). With > > > more recent qemu/libseccomp, it will default to > > > SCMP_ACT_KILL_PROCESS. In those KILL action cases, mesa will not be > > > able to catch the failing syscalls. > > > > Qemu / libvirt isn't the only thing using seccomp. > > > > For example Docker enables seccomp filters (along with capability > > restrictions) for the invoked containers unless that is explicitly > > disabled: > > https://docs.docker.com/engine/security/seccomp/ > > > > What actually gets filtered, is trivially changeable on Docker command > > line by giving a JSON file specifying the syscall filtering. > > > > Default policy seems to be white-listing affinity syscall: > > > > > https://github.com/moby/moby/blob/master/profiles/seccomp/default.jso > n > > > > > > Why distro versions of Qemu filter sched_setaffinity() syscall? > > > > > > (https://bugs.launchpad.net/ubuntu/+source/qemu/+bug/1815889) > > Daniel Berrange (berrange) wrote on 2019-02-27: #19 > > "IMHO that mesa change is not valid. It is settings its affinity to run on all > threads which is definitely *NOT* something we want to be allowed. > Management applications want to control which CPUs QEMU runs on, and as > such Mesa should honour the CPU placement that the QEMU process has. > > This is a great example of why QEMU wants to use seccomp to block affinity > changes to prevent something silently trying to use more CPUs than are > assigned to this QEMU." > > > > -- > Marc-André Lureau > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 109791] The mesa release config doesn't define NDEBUG when building using meson 0.45.0
https://bugs.freedesktop.org/show_bug.cgi?id=109791 --- Comment #3 from asimiklit --- I suggested a patch to add the warning: https://gitlab.freedesktop.org/mesa/mesa/merge_requests/358 -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] RFC: Workaround for pthread_setaffinity_np() seccomp filtering
Hi Eero! (ex-colleagues, long time ago!) On Thu, Feb 28, 2019 at 1:37 PM Eero Tamminen wrote: > > Hi, > > On 28.2.2019 11.57, Marc-André Lureau wrote: > > On Thu, Feb 28, 2019 at 1:17 AM Marek Olšák wrote: > >> I'd rather have something more robust than an env var, like catching > >> SIGSYS. > > SIGSYS is info for the invoking parent, not the (Mesa) process doing the > syscall. > > From "man 2 seccomp": > > The process terminates as though killed by a SIGSYS signal. Even if a > signal handler has been registered for SIGSYS, the handler will be > ignored in this case and the process always terminates. To a parent > process that is waiting on this process (using waitpid(2) or similar), > the returned wstatus will indicate that its child was terminated as > though by a SIGSYS signal. > > > > With current qemu in most distros, it defaults to SIGSYS (we switched > > away from SCMP_ACT_KILL, which had other problems). With more recent > > qemu/libseccomp, it will default to SCMP_ACT_KILL_PROCESS. In those > > KILL action cases, mesa will not be able to catch the failing > > syscalls. > > Qemu / libvirt isn't the only thing using seccomp. > > For example Docker enables seccomp filters (along with capability > restrictions) for the invoked containers unless that is explicitly > disabled: > https://docs.docker.com/engine/security/seccomp/ > > What actually gets filtered, is trivially changeable on Docker command > line by giving a JSON file specifying the syscall filtering. > > Default policy seems to be white-listing affinity syscall: > https://github.com/moby/moby/blob/master/profiles/seccomp/default.json > > > Why distro versions of Qemu filter sched_setaffinity() syscall? > > (https://bugs.launchpad.net/ubuntu/+source/qemu/+bug/1815889) Daniel Berrange (berrange) wrote on 2019-02-27: #19 "IMHO that mesa change is not valid. It is settings its affinity to run on all threads which is definitely *NOT* something we want to be allowed. Management applications want to control which CPUs QEMU runs on, and as such Mesa should honour the CPU placement that the QEMU process has. This is a great example of why QEMU wants to use seccomp to block affinity changes to prevent something silently trying to use more CPUs than are assigned to this QEMU." -- Marc-André Lureau ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 109532] ir_variable has maximum access out of bounds -- but it's not out of bounds
https://bugs.freedesktop.org/show_bug.cgi?id=109532 --- Comment #50 from asimiklit --- (In reply to Ian Romanick from comment #49) > After discussing https://github.com/KhronosGroup/OpenGL-API/issues/46 in the > Khronos call today, I realized that my thinking about this bug may have been > slightly incorrect. I believe that there are two separate issues here. > > 1. The issue with the array type and the maximum index. > > 2. The way the bindings are assigned to the elements that are used. > > No matter what happens, if the shader says > >layout(packed, binding = 3) buffer Block >{ >float b[1]; >} block[4]; > > Then the thing accessed in the shader as block[2].b[0] **must** be at > binding point 5 (from the API). The user as explicitly set that to binding > 5 by the declaration in the shader, so we absolutely have to respect that. > Had the application not explicitly set the bindings, I think we would be > free to assign whatever values we wanted. I believe that means the bindings > set by the CTS in the problematic test are correct. > > I think we also cannot reuse the intermediate bindings. The app may have > expectations that those bindings are for elements of block, and it may > blindly bind buffers to those bindings. If those bindings are used for > other things, only problems can result. I think this means we effectively > cannot eliminate array elements from buffer block arrays that have explicit > bindings. 85% sure, anyway. Ian could you please confirm if I understood everything correctly and clarify the **implicit case** too. 1. So everything is clear when user explicitly set binding point, in this case we should not optimize anything at all. 2. But looks like we are not 100% sure what to do with the **implicit case**. Let's consider options: layout(packed) buffer Block { float b[1]; } block[6]; (The shader uses block[1] and block[4]) 2.1. As it works now. Eliminate any unused array elements and shrink array to reuse binding points. (Note: will not pass deqp test) + we spend just 2 binding points for Block[1], Block[4] - it very unexpected behavior for user because: Block[0] is eliminated Block[1] binding point is 0 Block[2] is eliminated Block[3] is eliminated Block[4] binding point is 1 Block[5] is eliminated 2.2. Find first and last used elements and eliminate any before the first and after the last. This option was suggested by Ilia, Ian do you agree it? (Note: looks like should pass deqp test) + we spend 4 of 6 binding points - it better than 2.1 but looks like still not very expected behavior for user: Block[0] is eliminated Block[1] binding point is 0 Block[2] binding point is 1 Block[3] binding point is 2 Block[4] binding point is 3 Block[5] is eliminated 2.3. Find just last used element and eliminate any after the last. + we spend 5 of 6 binding points (Note: but if there is a big array with a big unused tail it should helps) +- quite expected behavior for user: Block[0] binding point is 0 Block[1] binding point is 1 Block[2] binding point is 2 Block[3] binding point is 3 Block[4] binding point is 4 Block[5] is eliminated 2.4. Avoid elimination at all like for explicitly case + very expected behavior for user - we spend 6 of 6 binding points Block[0] binding point is 0 Block[1] binding point is 1 Block[2] binding point is 2 Block[3] binding point is 3 Block[4] binding point is 4 Block[5] binding point is 5 I created MR for it: https://gitlab.freedesktop.org/mesa/mesa/merge_requests/332 It has WIP status because we aren't reached an agreement yet. But anyway I will be glad to see any advices there. -- You are receiving this mail because: You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 3/3] egl/sl: use kms_swrast with vgem instead of a random GPU
On Tue, 19 Feb 2019 at 16:00, Eric Engestrom wrote: > > On Tuesday, 2019-02-19 14:08:08 +, Emil Velikov wrote: > > From: Emil Velikov > > > > VGEM and kms_swrast were introduced to work with one another. > > > > All we do is CPU rendering to dumb buffers. There is no reason to carve > > out GPU memory, increasing the memory pressure on a device that could > > make a better use of it. > > > > Note: > > - The original code did not work out of the box, since the dumb buffer > > ioctls are not exposed to render nodes. > > - This requires libdrm commit 3df8a7f0 ("xf86drm: fallback to MODALIAS > > for OF less platform devices") > > - The non-kms, swrast is unaffected by this change. > > > > v2: > > - elaborate what and how is/isn't working (Eric) > > - simplify driver_name handling (Eric) > > > > Signed-off-by: Emil Velikov > > --- > > src/egl/drivers/dri2/platform_surfaceless.c | 19 +-- > > 1 file changed, 13 insertions(+), 6 deletions(-) > > > > diff --git a/src/egl/drivers/dri2/platform_surfaceless.c > > b/src/egl/drivers/dri2/platform_surfaceless.c > > index ccdc370d059..0917c15e16d 100644 > > --- a/src/egl/drivers/dri2/platform_surfaceless.c > > +++ b/src/egl/drivers/dri2/platform_surfaceless.c > > @@ -286,10 +286,11 @@ surfaceless_probe_device(_EGLDisplay *disp, bool > > swrast) > > for (i = 0; i < num_devices; ++i) { > >device = devices[i]; > > > > - if (!(device->available_nodes & (1 << DRM_NODE_RENDER))) > > + const unsigned node_type = swrast ? DRM_NODE_PRIMARY : > > DRM_NODE_RENDER; > > Nittiest of nits: this could be outside the loop :) > Sure, done. > > + if (!(device->available_nodes & (1 << node_type))) > > continue; > > > > - dri2_dpy->fd = loader_open_device(device->nodes[DRM_NODE_RENDER]); > > + dri2_dpy->fd = loader_open_device(device->nodes[node_type]); > >if (dri2_dpy->fd < 0) > > continue; > > > > @@ -300,10 +301,16 @@ surfaceless_probe_device(_EGLDisplay *disp, bool > > swrast) > > continue; > >} > > > > - if (swrast) > > - dri2_dpy->driver_name = strdup("kms_swrast"); > > - else > > - dri2_dpy->driver_name = loader_get_driver_for_fd(dri2_dpy->fd); > > + char *driver_name = loader_get_driver_for_fd(dri2_dpy->fd); > > + if (swrast) { > > + /* Use kms swrast only with vgem */ > > + if (strcmp(driver_name, "vgem") == 0) > > +dri2_dpy->driver_name = strdup("kms_swrast"); > > + free(driver_name); > > + } else { > > + /* Use the given hardware driver */ > > + dri2_dpy->driver_name = driver_name; > > + } > > That's easier to follow, thanks! > > Please wait for the chromium guys to weigh in, but: > Reviewed-by: Eric Engestrom > Thanks, pushed with Gurchetan's off-list rb. -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 109535] [Tracker] Mesa 19.0 release tracker
https://bugs.freedesktop.org/show_bug.cgi?id=109535 Timo Aaltonen changed: What|Removed |Added Depends on||109695 Referenced Bugs: https://bugs.freedesktop.org/show_bug.cgi?id=109695 [Bug 109695] qemu using spice gl and sandbox resourcecontrol=deny crashes with SIGSYS on radeonsi -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] intel/compiler: implement more algebraic optimizations
On Wed, 2019-02-27 at 17:04 -0800, Ian Romanick wrote: > On 2/27/19 4:45 AM, Iago Toral Quiroga wrote: > > Now that we propagate constants to the first source of 2src > > instructions we > > see more opportunities of constant folding in the backend. > > > > Shader-db results on KBL: > > > > total instructions in shared programs: 14965607 -> 14855983 (- > > 0.73%) > > instructions in affected programs: 3988102 -> 3878478 (-2.75%) > > helped: 14292 > > HURT: 59 > > > > total cycles in shared programs: 344324295 -> 340656008 (-1.07%) > > cycles in affected programs: 247527740 -> 243859453 (-1.48%) > > helped: 14056 > > HURT: 3314 > > > > total loops in shared programs: 4283 -> 4283 (0.00%) > > loops in affected programs: 0 -> 0 > > helped: 0 > > HURT: 0 > > > > total spills in shared programs: 27812 -> 24350 (-12.45%) > > spills in affected programs: 24921 -> 21459 (-13.89%) > > helped: 345 > > HURT: 19 > > > > total fills in shared programs: 24173 -> 22032 (-8.86%) > > fills in affected programs: 21124 -> 18983 (-10.14%) > > helped: 355 > > HURT: 25 > > Ignore my previous questions about nir_opt_constant_folding after > nir_opt_algebraic_late. I had done that because I added a bunch of > things to nir_opt_algebraic_late that created my constant folding > opportunities. > > This is the combined changes for this patch and the previous > patch. For > this patch alone, I got: > > total instructions in shared programs: 15306213 -> 15221518 (-0.55%) > instructions in affected programs: 2911451 -> 2826756 (-2.91%) > helped: 13121 > HURT: 44 > helped stats (abs) min: 1 max: 51 x̄: 6.66 x̃: 6 > helped stats (rel) min: <.01% max: 16.67% x̄: 4.27% x̃: 3.30% > HURT stats (abs) min: 3 max: 453 x̄: 61.16 x̃: 5 > HURT stats (rel) min: 0.20% max: 151.00% x̄: 31.57% x̃: 19.23% > 95% mean confidence interval for instructions value: -6.61 -6.26 > 95% mean confidence interval for instructions %-change: -4.23% -4.07% > Instructions are helped. > > total cycles in shared programs: 375419164 -> 372829148 (-0.69%) > cycles in affected programs: 146769299 -> 144179283 (-1.76%) > helped: 10992 > HURT: 1833 > helped stats (abs) min: 1 max: 56127 x̄: 250.29 x̃: 18 > helped stats (rel) min: <.01% max: 40.52% x̄: 3.11% x̃: 2.58% > HURT stats (abs) min: 1 max: 1718 x̄: 87.93 x̃: 42 > HURT stats (rel) min: <.01% max: 139.33% x̄: 7.74% x̃: 3.08% > 95% mean confidence interval for cycles value: -248.21 -155.69 > 95% mean confidence interval for cycles %-change: -1.67% -1.44% > Cycles are helped. > > total spills in shared programs: 28828 -> 2 (0.21%) > spills in affected programs: 2037 -> 2097 (2.95%) > helped: 0 > HURT: 24 > > total fills in shared programs: 35542 -> 35639 (0.27%) > fills in affected programs: 3078 -> 3175 (3.15%) > helped: 2 > HURT: 26 > > I decided to look at some of the hurt shaders... it looks like some > of > the Unigine geometry shaders really took a beating (+150% > instructions). > Note the "max" in the "instructions in affected programs" above. I am seeing quite different results on my KBL laptop: total instructions in shared programs: 14945933 -> 14858158 (-0.59%) instructions in affected programs: 2842901 -> 2755126 (-3.09%) helped: 13196 HURT: 5 instructions HURT: shaders/closed/steam/deus-ex-mankind- divided/274.shader_test CS SIMD8: 1535 -> 1538 (0.20%) instructions HURT: shaders/closed/steam/deus-ex-mankind- divided/184.shader_test CS SIMD8: 1535 -> 1538 (0.20%) instructions HURT: shaders/dolphin/ubershaders/147.shader_test FS SIMD8: 3481 -> 3491 (0.29%) instructions HURT: shaders/dolphin/ubershaders/156.shader_test FS SIMD8: 3465 -> 3475 (0.29%) instructions HURT: shaders/dolphin/ubershaders/138.shader_test FS SIMD8: 3465 -> 3475 (0.29%) Did you test on a different gen? Can you paste here the paths of some of the GS shaders where you see the big regressions so I can verify I have them in my shader-db? Also, how did you test this patch exactly? When I was going to capture the reference shader-db results for patch 2 in this series so I could extract the results for patch 3 by comparing against it, I noticed that patch 2 would create constant folding scenarios (for example for ADD and MUL) that, before this patch, would hit an assertion in the driver since the algebraic pass only expects to find these opportunities for F types and will assert on that, so I guess you noticed this and fixed it before taking your numbers? > More comments below by SHL... > > > LOST: 0 > > GAINED: 5 > > --- > > src/intel/compiler/brw_fs.cpp | 203 > > -- > > 1 file changed, 195 insertions(+), 8 deletions(-) > > > > diff --git a/src/intel/compiler/brw_fs.cpp > > b/src/intel/compiler/brw_fs.cpp > > index 2358acbeb59..b2b60237c82 100644 > > --- a/src/intel/compiler/brw_fs.cpp > > +++ b/src/intel/compiler/brw_fs.cpp > > @@ -2583,9 +2583,55 @@ fs_visitor::opt_algebraic() > > break; > > > >case BRW_OPCODE_MUL: > > - if
Re: [Mesa-dev] [PATCH] RFC: Workaround for pthread_setaffinity_np() seccomp filtering
Hi, On 28.2.2019 11.57, Marc-André Lureau wrote: On Thu, Feb 28, 2019 at 1:17 AM Marek Olšák wrote: I'd rather have something more robust than an env var, like catching SIGSYS. SIGSYS is info for the invoking parent, not the (Mesa) process doing the syscall. From "man 2 seccomp": The process terminates as though killed by a SIGSYS signal. Even if a signal handler has been registered for SIGSYS, the handler will be ignored in this case and the process always terminates. To a parent process that is waiting on this process (using waitpid(2) or similar), the returned wstatus will indicate that its child was terminated as though by a SIGSYS signal. With current qemu in most distros, it defaults to SIGSYS (we switched away from SCMP_ACT_KILL, which had other problems). With more recent qemu/libseccomp, it will default to SCMP_ACT_KILL_PROCESS. In those KILL action cases, mesa will not be able to catch the failing syscalls. Qemu / libvirt isn't the only thing using seccomp. For example Docker enables seccomp filters (along with capability restrictions) for the invoked containers unless that is explicitly disabled: https://docs.docker.com/engine/security/seccomp/ What actually gets filtered, is trivially changeable on Docker command line by giving a JSON file specifying the syscall filtering. Default policy seems to be white-listing affinity syscall: https://github.com/moby/moby/blob/master/profiles/seccomp/default.json Why distro versions of Qemu filter sched_setaffinity() syscall? - Eero Marek On Wed, Feb 27, 2019 at 6:13 PM wrote: From: Marc-André Lureau Since commit d877451b48a59ab0f9a4210fc736f51da5851c9a ("util/u_queue: add UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY"), mesa calls sched_setaffinity syscall. Unfortunately, qemu crashes with SIGSYS when sandboxing is enabled (by default with libvirt), as this syscall is filtered. There doesn't seem to be a way to check for the seccomp rule other than doing a call, which may result in various behaviour depending on seccomp actions. There is a PTRACE_SECCOMP_GET_FILTER, but it is low-level and a priviledged operation (but there might be a way to use it?). A safe way would be to try the call in a subprocess, unfortunately, qemu also prohibits fork(). Also this could be subject to TOCTOU. There seems to be few solutions, but the issue can be considered a regression for various libvirt/Boxes users. Introduce MESA_NO_THREAD_AFFINITY environment variable to prevent the offending call. Wrap pthread_setaffinity_np() in a utility function u_pthread_setaffinity_np(), returning a EACCESS error if the variable is set. Note: one call is left with a FIXME, as I didn't investigate how to build and test it, help welcome! See also: https://bugs.freedesktop.org/show_bug.cgi?id=109695 Signed-off-by: Marc-André Lureau --- .../drivers/swr/rasterizer/core/threads.cpp | 1 + src/util/u_queue.c| 2 +- src/util/u_thread.h | 15 ++- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index e30c1170568..d10c79512a1 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -364,6 +364,7 @@ void bindThread(SWR_CONTEXT* pContext, CPU_ZERO(); CPU_SET(threadId, ); +/* FIXME: use u_pthread_setaffinity_np() if possible */ int err = pthread_setaffinity_np(thread, sizeof(cpu_set_t), ); if (err != 0) { diff --git a/src/util/u_queue.c b/src/util/u_queue.c index 3812c824b6d..dea8d2bb4ae 100644 --- a/src/util/u_queue.c +++ b/src/util/u_queue.c @@ -249,7 +249,7 @@ util_queue_thread_func(void *input) for (unsigned i = 0; i < CPU_SETSIZE; i++) CPU_SET(i, ); - pthread_setaffinity_np(pthread_self(), sizeof(cpuset), ); + u_pthread_setaffinity_np(pthread_self(), sizeof(cpuset), ); } #endif diff --git a/src/util/u_thread.h b/src/util/u_thread.h index a46c18d3db2..a4e6dbae5d7 100644 --- a/src/util/u_thread.h +++ b/src/util/u_thread.h @@ -70,6 +70,19 @@ static inline void u_thread_setname( const char *name ) (void)name; } +#if defined(HAVE_PTHREAD_SETAFFINITY) +static inline int u_pthread_setaffinity_np(pthread_t thread, size_t cpusetsize, + const cpu_set_t *cpuset) +{ + if (getenv("MESA_NO_THREAD_AFFINITY")) { + errno = EACCES; + return -1; + } + + return pthread_setaffinity_np(thread, cpusetsize, cpuset); +} +#endif + /** * An AMD Zen CPU consists of multiple modules where each module has its own L3 * cache. Inter-thread communication such as locks and atomics between modules @@ -89,7 +102,7 @@ util_pin_thread_to_L3(thrd_t thread, unsigned L3_index, unsigned cores_per_L3) CPU_ZERO(); for (unsigned i = 0; i <
Re: [Mesa-dev] [Mesa-stable] [PATCH 1/2] st/nine: Ignore window size if error
On Wed, 27 Feb 2019 at 22:49, Axel Davy wrote: > > Check GetWindowInfo and ignore the computed sizes > if there is an error. > > Fixes the regression caused by: > commit 2318ca68bbeb4fa6e21a4d8c650cec3f64246596 > "st/nine: Handle window resize when a presentation buffer is used" > when using old wine gallium nine patches > > Related issues: > https://github.com/iXit/Mesa-3D/issues/331 > https://github.com/iXit/Mesa-3D/issues/332 > > Fixes also crash at window destruction. > > Cc: mesa-sta...@lists.freedesktop.org > > Signed-off-by: Axel Davy > --- Nittiest of nits: the following takes 1/3 the cognitive effort. Check GetWindowInfo and ignore the computed sizes if there is an error. Fixes a regression caused by earlier commit when using old wine gallium nine patches. Should also address a crash at window destruction. Related issues: https://github.com/iXit/Mesa-3D/issues/331 https://github.com/iXit/Mesa-3D/issues/332 Cc: mesa-sta...@lists.freedesktop.org Fixes: 2318ca68bbe ("st/nine: Handle window resize when a presentation buffer is used") Signed-off-by: Axel Davy HTH -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] gallium: Implement APPLE_object_purgeable (iris, freedeno, vc4)
Quoting Emil Velikov (2019-02-28 11:44:28) > On Tue, 26 Feb 2019 at 21:52, Chris Wilson wrote: > > > > A few of the GEM drivers provide matching ioctls to allow control of > > their bo caches. Hook these up to APPLE_object_purgeable to allow > > clients to discard video memory under pressure where they are able to > > fallback to restoring content themselves, e.g. from their own (presumably > > compressed, on disk) caches. > > > > v2: Refactor the repeated resource purging. > > > > Cc: Eric Anholt > > Cc: Kenneth Graunke > > Cc: Rob Clark > > --- > > .../drivers/freedreno/freedreno_resource.c| 10 ++ > > .../drivers/freedreno/freedreno_screen.c | 1 + > > src/gallium/drivers/iris/iris_resource.c | 10 ++ > > src/gallium/drivers/iris/iris_screen.c| 1 + > > src/gallium/drivers/vc4/vc4_bufmgr.c | 15 ++ > > src/gallium/drivers/vc4/vc4_bufmgr.h | 3 + > > src/gallium/drivers/vc4/vc4_resource.c| 10 ++ > > src/gallium/drivers/vc4/vc4_screen.c | 3 + > > src/gallium/include/pipe/p_defines.h | 1 + > > src/gallium/include/pipe/p_screen.h | 20 +++ > > src/mesa/Makefile.sources | 2 + > > src/mesa/meson.build | 2 + > > src/mesa/state_tracker/st_cb_objectpurge.c| 141 ++ > > src/mesa/state_tracker/st_cb_objectpurge.h| 38 + > > src/mesa/state_tracker/st_context.c | 2 + > > src/mesa/state_tracker/st_extensions.c| 1 + > > 16 files changed, 260 insertions(+) > > create mode 100644 src/mesa/state_tracker/st_cb_objectpurge.c > > create mode 100644 src/mesa/state_tracker/st_cb_objectpurge.h > > > As-is this is, kind of, blocked on getting it right on all drivers. > Can I'd suggest splitting this in 4 patches: > - st/mesa - src/mesa + src/gallium/include + src/gallium/aux > - iris > - freedreno > - vc4 > > This way others can wire their drivers while the iris/freedreno/vc4 > review/testing is ongoing. Sure, even more so when the changes are non trivial as they will have to be pass the functional tests [to be written]. Hopefully, I can break iris locally. -Chris ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] gallium: Implement APPLE_object_purgeable (iris, freedeno, vc4)
On Tue, 26 Feb 2019 at 21:52, Chris Wilson wrote: > > A few of the GEM drivers provide matching ioctls to allow control of > their bo caches. Hook these up to APPLE_object_purgeable to allow > clients to discard video memory under pressure where they are able to > fallback to restoring content themselves, e.g. from their own (presumably > compressed, on disk) caches. > > v2: Refactor the repeated resource purging. > > Cc: Eric Anholt > Cc: Kenneth Graunke > Cc: Rob Clark > --- > .../drivers/freedreno/freedreno_resource.c| 10 ++ > .../drivers/freedreno/freedreno_screen.c | 1 + > src/gallium/drivers/iris/iris_resource.c | 10 ++ > src/gallium/drivers/iris/iris_screen.c| 1 + > src/gallium/drivers/vc4/vc4_bufmgr.c | 15 ++ > src/gallium/drivers/vc4/vc4_bufmgr.h | 3 + > src/gallium/drivers/vc4/vc4_resource.c| 10 ++ > src/gallium/drivers/vc4/vc4_screen.c | 3 + > src/gallium/include/pipe/p_defines.h | 1 + > src/gallium/include/pipe/p_screen.h | 20 +++ > src/mesa/Makefile.sources | 2 + > src/mesa/meson.build | 2 + > src/mesa/state_tracker/st_cb_objectpurge.c| 141 ++ > src/mesa/state_tracker/st_cb_objectpurge.h| 38 + > src/mesa/state_tracker/st_context.c | 2 + > src/mesa/state_tracker/st_extensions.c| 1 + > 16 files changed, 260 insertions(+) > create mode 100644 src/mesa/state_tracker/st_cb_objectpurge.c > create mode 100644 src/mesa/state_tracker/st_cb_objectpurge.h > As-is this is, kind of, blocked on getting it right on all drivers. Can I'd suggest splitting this in 4 patches: - st/mesa - src/mesa + src/gallium/include + src/gallium/aux - iris - freedreno - vc4 This way others can wire their drivers while the iris/freedreno/vc4 review/testing is ongoing. HTH Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] How to get started to contribute (for EVOC)?
Hi Adarsh, On Thu, 28 Feb 2019 at 08:32, Adarsh Khubchandani wrote: > > Hello everyone. My name is Adarsh Khubchandani and I am an engineering > student from Mumbai, India. I have pretty decent knowledge of C, Java, > Python, HTML, CSS as well as technical documentation. My question is how to > get started as a contributor and apply for EVOC. The directions given on the > site are pretty hazzy and say that you need to establish contact with mentor > first before applying. > I am more interested in working on mesa/ OpenMax. Please guide me on the > further steps. > The EVoC page [1] has some tips: Next, find a potential mentor if you haven't already identified one. See the "Potential Mentors" list at the end of the ideas page[2]. From the list we can see: Julien Isorce - OpenMAX He's in the CC list, so he should be able to provide extra pointers. HTH Emil [1] https://www.x.org/wiki/XorgEVoC/ [2] https://www.x.org/wiki/SummerOfCodeIdeas/ ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] scons: Workaround failures with MSVC when using SCons 3.0.[2-4].
This change applies the workaround suggested by Bill Deegan on the affected SCons versions. It also adds a comment with the URL explaining why we were using customizing the decider and max_drift in the first place, as I had forgotten all about it. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109443 Tested-by: liviupro...@yahoo.com --- scons/gallium.py | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scons/gallium.py b/scons/gallium.py index 963834a5fbc..efe32e06c6c 100755 --- a/scons/gallium.py +++ b/scons/gallium.py @@ -308,7 +308,13 @@ def generate(env): if env.GetOption('num_jobs') <= 1: env.SetOption('num_jobs', num_jobs()) -env.Decider('MD5-timestamp') +# Speed up dependency checking. See +# - https://github.com/SCons/scons/wiki/GoFastButton +# - https://bugs.freedesktop.org/show_bug.cgi?id=109443 +scons_version = distutils.version.StrictVersion(SCons.__version__) +if scons_version < distutils.version.StrictVersion('3.0.2') or \ + scons_version > distutils.version.StrictVersion('3.0.4'): +env.Decider('MD5-timestamp') env.SetOption('max_drift', 60) # C preprocessor options -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] RFC: Workaround for pthread_setaffinity_np() seccomp filtering
Hi On Thu, Feb 28, 2019 at 1:17 AM Marek Olšák wrote: > > I'd rather have something more robust than an env var, like catching SIGSYS. With current qemu in most distros, it defaults to SIGSYS (we switched away from SCMP_ACT_KILL, which had other problems). With more recent qemu/libseccomp, it will default to SCMP_ACT_KILL_PROCESS. In those KILL action cases, mesa will not be able to catch the failing syscalls. > > Marek > > On Wed, Feb 27, 2019 at 6:13 PM wrote: >> >> From: Marc-André Lureau >> >> Since commit d877451b48a59ab0f9a4210fc736f51da5851c9a ("util/u_queue: >> add UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY"), mesa calls >> sched_setaffinity syscall. Unfortunately, qemu crashes with SIGSYS >> when sandboxing is enabled (by default with libvirt), as this syscall >> is filtered. >> >> There doesn't seem to be a way to check for the seccomp rule other >> than doing a call, which may result in various behaviour depending on >> seccomp actions. There is a PTRACE_SECCOMP_GET_FILTER, but it is >> low-level and a priviledged operation (but there might be a way to use >> it?). A safe way would be to try the call in a subprocess, >> unfortunately, qemu also prohibits fork(). Also this could be subject >> to TOCTOU. >> >> There seems to be few solutions, but the issue can be considered a >> regression for various libvirt/Boxes users. >> >> Introduce MESA_NO_THREAD_AFFINITY environment variable to prevent the >> offending call. Wrap pthread_setaffinity_np() in a utility function >> u_pthread_setaffinity_np(), returning a EACCESS error if the variable >> is set. >> >> Note: one call is left with a FIXME, as I didn't investigate how to >> build and test it, help welcome! >> >> See also: >> https://bugs.freedesktop.org/show_bug.cgi?id=109695 >> >> Signed-off-by: Marc-André Lureau >> --- >> .../drivers/swr/rasterizer/core/threads.cpp | 1 + >> src/util/u_queue.c| 2 +- >> src/util/u_thread.h | 15 ++- >> 3 files changed, 16 insertions(+), 2 deletions(-) >> >> diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp >> b/src/gallium/drivers/swr/rasterizer/core/threads.cpp >> index e30c1170568..d10c79512a1 100644 >> --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp >> +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp >> @@ -364,6 +364,7 @@ void bindThread(SWR_CONTEXT* pContext, >> CPU_ZERO(); >> CPU_SET(threadId, ); >> >> +/* FIXME: use u_pthread_setaffinity_np() if possible */ >> int err = pthread_setaffinity_np(thread, sizeof(cpu_set_t), ); >> if (err != 0) >> { >> diff --git a/src/util/u_queue.c b/src/util/u_queue.c >> index 3812c824b6d..dea8d2bb4ae 100644 >> --- a/src/util/u_queue.c >> +++ b/src/util/u_queue.c >> @@ -249,7 +249,7 @@ util_queue_thread_func(void *input) >>for (unsigned i = 0; i < CPU_SETSIZE; i++) >> CPU_SET(i, ); >> >> - pthread_setaffinity_np(pthread_self(), sizeof(cpuset), ); >> + u_pthread_setaffinity_np(pthread_self(), sizeof(cpuset), ); >> } >> #endif >> >> diff --git a/src/util/u_thread.h b/src/util/u_thread.h >> index a46c18d3db2..a4e6dbae5d7 100644 >> --- a/src/util/u_thread.h >> +++ b/src/util/u_thread.h >> @@ -70,6 +70,19 @@ static inline void u_thread_setname( const char *name ) >> (void)name; >> } >> >> +#if defined(HAVE_PTHREAD_SETAFFINITY) >> +static inline int u_pthread_setaffinity_np(pthread_t thread, size_t >> cpusetsize, >> + const cpu_set_t *cpuset) >> +{ >> + if (getenv("MESA_NO_THREAD_AFFINITY")) { >> + errno = EACCES; >> + return -1; >> + } >> + >> + return pthread_setaffinity_np(thread, cpusetsize, cpuset); >> +} >> +#endif >> + >> /** >> * An AMD Zen CPU consists of multiple modules where each module has its >> own L3 >> * cache. Inter-thread communication such as locks and atomics between >> modules >> @@ -89,7 +102,7 @@ util_pin_thread_to_L3(thrd_t thread, unsigned L3_index, >> unsigned cores_per_L3) >> CPU_ZERO(); >> for (unsigned i = 0; i < cores_per_L3; i++) >>CPU_SET(L3_index * cores_per_L3 + i, ); >> - pthread_setaffinity_np(thread, sizeof(cpuset), ); >> + u_pthread_setaffinity_np(thread, sizeof(cpuset), ); >> #endif >> } >> >> -- >> 2.21.0 >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev -- Marc-André Lureau ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 109443] Build failure with MSVC when using Scons >= 3.0.2
https://bugs.freedesktop.org/show_bug.cgi?id=109443 --- Comment #8 from Jose Fonseca --- (In reply to Alex Granni from comment #7) > I'd go for this way as 3.0.4 is also affected. OK. I'll assume I have your Tested-By and post it for review. -- You are receiving this mail because: You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] radeonsi: always use compute rings for clover on CI and newer (v2)
On Tue, 2019-02-26 at 18:34 -0500, Marek Olšák wrote: > I ran a simple test verifying that compute is working properly on the > compute ring. I guess this was not on raven? With his patch I no loner see gfx timeout but the apps still hang. anyway that's a separate issue. > > When clover is using compute rings, it doesn't stall/block graphics > operations. I'd be nice to include this information in the commit message. Jan > > Marek > > On Tue, Feb 26, 2019 at 4:10 PM Jan Vesely wrote: > > > Can you add a bit of background why clover should/should not use other > > rings? > > > > I planned to test this, but my raven system can't run clover since kernel > > 4.20 release (BZ 109649), so I need to bisect that first. > > Can this patch help address the soft lockup issue on CIK (BZ 108879)? > > presumably, it was tested using clover on CIK, right? > > > > Jan > > > > On Tue, Feb 26, 2019 at 3:00 PM Marek Olšák wrote: > > > > > I'll just push it. > > > > > > Marek > > > > > > On Mon, Feb 25, 2019 at 9:37 PM Dieter Nützel > > > wrote: > > > > > > > Hello Marek, > > > > > > > > this series need a rebase (if you have some time). > > > > > > > > Dieter > > > > > > > > Am 12.02.2019 19:12, schrieb Marek Olšák: > > > > > From: Marek Olšák > > > > > > > > > > initialize all non-compute context functions to NULL. > > > > > > > > > > v2: fix SI > > > > > --- > > > > > src/gallium/drivers/radeonsi/si_blit.c| 14 ++- > > > > > src/gallium/drivers/radeonsi/si_clear.c | 7 +- > > > > > src/gallium/drivers/radeonsi/si_compute.c | 15 +-- > > > > > src/gallium/drivers/radeonsi/si_descriptors.c | 10 +- > > > > > src/gallium/drivers/radeonsi/si_gfx_cs.c | 29 +++--- > > > > > src/gallium/drivers/radeonsi/si_pipe.c| 95 > > > > > +++ > > > > > src/gallium/drivers/radeonsi/si_pipe.h| 3 +- > > > > > src/gallium/drivers/radeonsi/si_state.c | 3 +- > > > > > src/gallium/drivers/radeonsi/si_state.h | 1 + > > > > > src/gallium/drivers/radeonsi/si_state_draw.c | 25 +++-- > > > > > src/gallium/drivers/radeonsi/si_texture.c | 3 + > > > > > 11 files changed, 130 insertions(+), 75 deletions(-) > > > > > > > > > > diff --git a/src/gallium/drivers/radeonsi/si_blit.c > > > > > b/src/gallium/drivers/radeonsi/si_blit.c > > > > > index bb8d1cbd12d..f39cb5d143f 100644 > > > > > --- a/src/gallium/drivers/radeonsi/si_blit.c > > > > > +++ b/src/gallium/drivers/radeonsi/si_blit.c > > > > > @@ -1345,25 +1345,31 @@ static void si_flush_resource(struct > > > > > pipe_context *ctx, > > > > > > > > > > if (separate_dcc_dirty) { > > > > > tex->separate_dcc_dirty = false; > > > > > vi_separate_dcc_process_and_reset_stats(ctx, > > > > > > > > tex); > > > > > } > > > > > } > > > > > } > > > > > > > > > > void si_decompress_dcc(struct si_context *sctx, struct si_texture > > > > > *tex) > > > > > { > > > > > - if (!tex->dcc_offset) > > > > > + /* If graphics is disabled, we can't decompress DCC, but it > > > > > > > > shouldn't > > > > > + * be compressed either. The caller should simply discard it. > > > > > + */ > > > > > + if (!tex->dcc_offset || !sctx->has_graphics) > > > > > return; > > > > > > > > > > si_blit_decompress_color(sctx, tex, 0, > > > > > > > > tex->buffer.b.b.last_level, > > > > >0, util_max_layer(>buffer.b.b, 0), > > > > >true); > > > > > } > > > > > > > > > > void si_init_blit_functions(struct si_context *sctx) > > > > > { > > > > > sctx->b.resource_copy_region = si_resource_copy_region; > > > > > - sctx->b.blit = si_blit; > > > > > - sctx->b.flush_resource = si_flush_resource; > > > > > - sctx->b.generate_mipmap = si_generate_mipmap; > > > > > + > > > > > + if (sctx->has_graphics) { > > > > > + sctx->b.blit = si_blit; > > > > > + sctx->b.flush_resource = si_flush_resource; > > > > > + sctx->b.generate_mipmap = si_generate_mipmap; > > > > > + } > > > > > } > > > > > diff --git a/src/gallium/drivers/radeonsi/si_clear.c > > > > > b/src/gallium/drivers/radeonsi/si_clear.c > > > > > index 9a00bb73b94..e1805f2a1c9 100644 > > > > > --- a/src/gallium/drivers/radeonsi/si_clear.c > > > > > +++ b/src/gallium/drivers/radeonsi/si_clear.c > > > > > @@ -764,15 +764,18 @@ static void si_clear_texture(struct pipe_context > > > > > *pipe, > > > > > util_clear_render_target(pipe, sf, , > > > > >box->x, box->y, > > > > >box->width, > > > > > > > > box->height); > > > > > } > > > > > } > > > > > pipe_surface_reference(, NULL); > > > > > } > > > > > > > > > > void si_init_clear_functions(struct si_context *sctx) > > > > > { > > > > > - sctx->b.clear =
Re: [Mesa-dev] [PATCH] panfrost: List primitive restart enable bit
Am Mi., 27. Feb. 2019 um 17:12 Uhr schrieb Alyssa Rosenzweig : > > > super nitpicking: What is the purpose of this change? Should this > > define be added when it really gets used? > > I mean, we don't have explicit documentation on the cmdstream; if I > don't add the define / enum / struct / whatever in, it will be forgotten > to time. Ok, thanks. -- greets -- Christian Gmeiner, MSc https://christian-gmeiner.info ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 109741] swr doesn't build on i386 - simdlib.hpp:594:37: error: static assertion failed: This path only meant for 64-bit code
https://bugs.freedesktop.org/show_bug.cgi?id=109741 --- Comment #2 from Gert Wollny --- There is a patch for this: https://patchwork.freedesktop.org/patch/225706/ -- You are receiving this mail because: You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium/util: Fix off-by-one in box intersection
Hello Gurchetan, On Wed, 27 Feb 2019 10:34:26 -0800 Gurchetan Singh wrote: > On Mon, Feb 25, 2019 at 12:35 AM Boris Brezillon > wrote: > > > > From: Daniel Stone > > > > pipe_boxes are x/y + width/height, rather than x0/y0 -> x1/y1. This > > means that (x+width) is not included in the box. > > > > The box intersection check was seemingly written for inclusive regions, > > and would falsely assert that adjacent boxes would overlap. > > > > Fix the off-by-one by being one pixel less greedy. > > Is there a reason for this change? I only see this used in a warning > in the nine state tracker and virgl (where reporting adjacent > intersections is preferred). This patch was part of a series Daniel worked on to optimize texture atlas updates on Vivante GPUs [1]. In the end, this work has been put on hold because the perf optimization was not as high as expected, but it might be resurrected at some point. Anyway, back to the point. In this patchset, the pipe_region_overlaps() helper needs to check when regions overlap and not when they're adjacent. If other users need u_box_test_intersection_2d() to also detect when boxes are adjacent, then we should definitely keep the code unchanged, but maybe it's worth a comment in the code to clarify the behavior. Regards, Boris [1]https://gitlab.collabora.com/bbrezillon/mesa/commits/etna-texture-atlas-18.2.4 > > > > > Signed-off-by: Daniel Stone > > Signed-off-by: Boris Brezillon > > --- > > src/gallium/auxiliary/util/u_box.h | 16 > > 1 file changed, 8 insertions(+), 8 deletions(-) > > > > diff --git a/src/gallium/auxiliary/util/u_box.h > > b/src/gallium/auxiliary/util/u_box.h > > index b3f478e7bfc4..ead7189ecaf8 100644 > > --- a/src/gallium/auxiliary/util/u_box.h > > +++ b/src/gallium/auxiliary/util/u_box.h > > @@ -161,15 +161,15 @@ u_box_test_intersection_2d(const struct pipe_box *a, > > unsigned i; > > int a_l[2], a_r[2], b_l[2], b_r[2]; > > > > - a_l[0] = MIN2(a->x, a->x + a->width); > > - a_r[0] = MAX2(a->x, a->x + a->width); > > - a_l[1] = MIN2(a->y, a->y + a->height); > > - a_r[1] = MAX2(a->y, a->y + a->height); > > + a_l[0] = MIN2(a->x, a->x + a->width - 1); > > + a_r[0] = MAX2(a->x, a->x + a->width - 1); > > + a_l[1] = MIN2(a->y, a->y + a->height - 1); > > + a_r[1] = MAX2(a->y, a->y + a->height - 1); > > > > - b_l[0] = MIN2(b->x, b->x + b->width); > > - b_r[0] = MAX2(b->x, b->x + b->width); > > - b_l[1] = MIN2(b->y, b->y + b->height); > > - b_r[1] = MAX2(b->y, b->y + b->height); > > + b_l[0] = MIN2(b->x, b->x + b->width - 1); > > + b_r[0] = MAX2(b->x, b->x + b->width - 1); > > + b_l[1] = MIN2(b->y, b->y + b->height - 1); > > + b_r[1] = MAX2(b->y, b->y + b->height - 1); > > > > for (i = 0; i < 2; ++i) { > >if (a_l[i] > b_r[i] || a_r[i] < b_l[i]) > > -- > > 2.20.1 > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] How to get started to contribute (for EVOC)?
Hello everyone. My name is Adarsh Khubchandani and I am an engineering student from Mumbai, India. I have pretty decent knowledge of C, Java, Python, HTML, CSS as well as technical documentation. My question is how to get started as a contributor and apply for EVOC. The directions given on the site are pretty hazzy and say that you need to establish contact with mentor first before applying. I am more interested in working on mesa/ OpenMax. Please guide me on the further steps. Thanks. --- Adarsh Khubchandani.https://www.zoho.com/mail/ https://askhubchandani.github.io___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev