Re: [Mesa-dev] [PATCH] i965: Clean up brwNewProgram().
On 23/08/17 12:19, Kenneth Graunke wrote: All shader stages do the exact same thing, so we don't need the switch statement, or the redundant FS case. I believe these used to be different before Tim eliminated the (e.g.) brw_vertex_program subclasses. Seems about right, I must have forgotten to clean these up. Reviewed-by: Timothy Arceri--- src/mesa/drivers/dri/i965/brw_program.c | 33 + 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 94d8d8b978a..257a99bc946 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -138,38 +138,15 @@ static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target, GLuint id, bool is_arb_asm) { struct brw_context *brw = brw_context(ctx); + struct brw_program *prog = rzalloc(NULL, struct brw_program); - switch (target) { - case GL_VERTEX_PROGRAM_ARB: - case GL_TESS_CONTROL_PROGRAM_NV: - case GL_TESS_EVALUATION_PROGRAM_NV: - case GL_GEOMETRY_PROGRAM_NV: - case GL_COMPUTE_PROGRAM_NV: { - struct brw_program *prog = rzalloc(NULL, struct brw_program); - if (prog) { -prog->id = get_new_program_id(brw->screen); - - return _mesa_init_gl_program(>program, target, id, is_arb_asm); - } - else -return NULL; - } - - case GL_FRAGMENT_PROGRAM_ARB: { - struct brw_program *prog = rzalloc(NULL, struct brw_program); + if (prog) { + prog->id = get_new_program_id(brw->screen); - if (prog) { -prog->id = get_new_program_id(brw->screen); - - return _mesa_init_gl_program(>program, target, id, is_arb_asm); - } - else -return NULL; + return _mesa_init_gl_program(>program, target, id, is_arb_asm); } - default: - unreachable("Unsupported target in brwNewProgram()"); - } + return NULL; } static void brwDeleteProgram( struct gl_context *ctx, ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v4 6/6] radeonsi: try to re-use previously deleted bindless descriptor slots
This is causing piglit regressions for me. For example: ./bin/shader_runner tests/spec/arb_bindless_texture/execution/images/multiple-resident-images-reading.shader_test -auto -fb Unexpected GL error: GL_OUT_OF_MEMORY 0x505 (Error at /home/tarceri/git/Mesa_arrays_of_arrays_piglit/tests/shaders/shader_runner.c:3560) glMakeImageHandleResidentARB error PIGLIT: {"result": "fail" } On 22/08/17 07:22, Marek Olšák wrote: Reviewed-by: Marek OlšákSo let's commit this! Marek On Mon, Aug 21, 2017 at 4:50 PM, Samuel Pitoiset wrote: Currently, when the array is full it is resized but it can grow over and over because we don't try to re-use descriptor slots. v4: - rebase on top of idalloc changes v3: - use new idalloc gallium module Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/radeonsi/si_descriptors.c | 36 +-- src/gallium/drivers/radeonsi/si_pipe.h| 2 ++ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index f14fce103f..c869dac9bb 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -61,6 +61,7 @@ #include "gfx9d.h" #include "util/hash_table.h" +#include "util/u_idalloc.h" #include "util/u_format.h" #include "util/u_memory.h" #include "util/u_upload_mgr.h" @@ -2335,23 +2336,27 @@ static void si_init_bindless_descriptors(struct si_context *sctx, * considered to be a valid handle. */ sctx->num_bindless_descriptors = 1; + + /* Track which bindless slots are used (or not). */ + util_idalloc_init(>bindless_used_slots); + util_idalloc_resize(>bindless_used_slots, num_elements); + + /* Reserve slot 0 because it's an invalid handle for bindless. */ + assert(!util_idalloc_alloc(>bindless_used_slots)); } static void si_release_bindless_descriptors(struct si_context *sctx) { si_release_descriptors(>bindless_descriptors); + util_idalloc_fini(>bindless_used_slots); } -static unsigned -si_create_bindless_descriptor(struct si_context *sctx, uint32_t *desc_list, - unsigned size) +static unsigned si_get_first_free_bindless_slot(struct si_context *sctx) { struct si_descriptors *desc = >bindless_descriptors; - unsigned desc_slot, desc_slot_offset; - - /* Reserve a new slot for this bindless descriptor. */ - desc_slot = sctx->num_bindless_descriptors++; + unsigned desc_slot; + desc_slot = util_idalloc_alloc(>bindless_used_slots); if (desc_slot >= desc->num_elements) { /* The array of bindless descriptors is full, resize it. */ unsigned slot_size = desc->element_dw_size * 4; @@ -2363,6 +2368,20 @@ si_create_bindless_descriptor(struct si_context *sctx, uint32_t *desc_list, desc->num_active_slots = new_num_elements; } + assert(desc_slot); + return desc_slot; +} + +static unsigned +si_create_bindless_descriptor(struct si_context *sctx, uint32_t *desc_list, + unsigned size) +{ + struct si_descriptors *desc = >bindless_descriptors; + unsigned desc_slot, desc_slot_offset; + + /* Find a free slot. */ + desc_slot = si_get_first_free_bindless_slot(sctx); + /* For simplicity, sampler and image bindless descriptors use fixed * 16-dword slots for now. Image descriptors only need 8-dword but this * doesn't really matter because no real apps use image handles. @@ -2475,6 +2494,9 @@ static void si_delete_texture_handle(struct pipe_context *ctx, uint64_t handle) tex_handle = (struct si_texture_handle *)entry->data; + /* Allow this descriptor slot to be re-used. */ + util_idalloc_free(>bindless_used_slots, tex_handle->desc_slot); + pipe_sampler_view_reference(_handle->view, NULL); _mesa_hash_table_remove(sctx->tex_handles, entry); FREE(tex_handle); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 56c3b08188..8a475d3b05 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -29,6 +29,7 @@ #include "si_shader.h" #include "util/u_dynarray.h" +#include "util/u_idalloc.h" #ifdef PIPE_ARCH_BIG_ENDIAN #define SI_BIG_ENDIAN 1 @@ -430,6 +431,7 @@ struct si_context { /* Bindless descriptors. */ struct si_descriptors bindless_descriptors; + struct util_idalloc bindless_used_slots; unsignednum_bindless_descriptors; boolbindless_descriptors_dirty; boolgraphics_bindless_pointer_dirty; -- 2.14.1 ___ mesa-dev mailing list
[Mesa-dev] [Bug 102017] Wrong colours in Cities Skyline
https://bugs.freedesktop.org/show_bug.cgi?id=102017 --- Comment #13 from MWATTT--- I definitively can't reproduce this bug, even with Ubuntu 17.04 stock mesa. It might be specific to some high end cards. Which game settings do you use? -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: Clean up brwNewProgram().
All shader stages do the exact same thing, so we don't need the switch statement, or the redundant FS case. I believe these used to be different before Tim eliminated the (e.g.) brw_vertex_program subclasses. --- src/mesa/drivers/dri/i965/brw_program.c | 33 + 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 94d8d8b978a..257a99bc946 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -138,38 +138,15 @@ static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target, GLuint id, bool is_arb_asm) { struct brw_context *brw = brw_context(ctx); + struct brw_program *prog = rzalloc(NULL, struct brw_program); - switch (target) { - case GL_VERTEX_PROGRAM_ARB: - case GL_TESS_CONTROL_PROGRAM_NV: - case GL_TESS_EVALUATION_PROGRAM_NV: - case GL_GEOMETRY_PROGRAM_NV: - case GL_COMPUTE_PROGRAM_NV: { - struct brw_program *prog = rzalloc(NULL, struct brw_program); - if (prog) { -prog->id = get_new_program_id(brw->screen); - - return _mesa_init_gl_program(>program, target, id, is_arb_asm); - } - else -return NULL; - } - - case GL_FRAGMENT_PROGRAM_ARB: { - struct brw_program *prog = rzalloc(NULL, struct brw_program); + if (prog) { + prog->id = get_new_program_id(brw->screen); - if (prog) { -prog->id = get_new_program_id(brw->screen); - - return _mesa_init_gl_program(>program, target, id, is_arb_asm); - } - else -return NULL; + return _mesa_init_gl_program(>program, target, id, is_arb_asm); } - default: - unreachable("Unsupported target in brwNewProgram()"); - } + return NULL; } static void brwDeleteProgram( struct gl_context *ctx, -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: Only set key->flat_shade if COL0/COL1 are written.
This may reduce some recompiles. --- src/mesa/drivers/dri/i965/brw_wm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index c9c45045902..e1555d60c56 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -531,7 +531,9 @@ brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key) key->stats_wm = brw->stats_wm; /* _NEW_LIGHT */ - key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); + key->flat_shade = + (prog->info.inputs_read & (VARYING_BIT_COL0 | VARYING_BIT_COL1)) && + (ctx->Light.ShadeModel == GL_FLAT); /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */ key->clamp_fragment_color = ctx->Color._ClampFragmentColor; -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: Record NOS dependencies for shader programs and only check those.
Previously, the state upload code listened to a broad set of dirty flags that corresponded to all possible state dependencies for a shader stage. This is somewhat overkill. For example, if a shader has no textures, there is no need to listen to _NEW_TEXTURE. Although these extra dependencies are harmless for correctness, they cause us to recompute the program keys and search the program cache more often than necessary. This patch introduces a new brw_program::nos field, containing the dirty flags which cover a shader's non-orthogonal state dependencies. We look at what the shader actually requires, and record that at link time (or fixed function program generation time). Then, the state upload code simply checks the current dirty flags against those. This should reduce CPU overhead when drawing with the same shaders multiple times, but changing state or resources (such as binding new textures or changing uniforms). Improves performance in GFXBench4's gl_driver2_off on Apollolake at 1280x720 by 3.06834% +/- 0.722141% (n=25). --- src/mesa/drivers/dri/i965/brw_context.h | 11 ++ src/mesa/drivers/dri/i965/brw_cs.c | 7 +++- src/mesa/drivers/dri/i965/brw_gs.c | 13 --- src/mesa/drivers/dri/i965/brw_link.cpp | 2 ++ src/mesa/drivers/dri/i965/brw_program.c | 32 ++ src/mesa/drivers/dri/i965/brw_program.h | 10 ++ src/mesa/drivers/dri/i965/brw_state.h | 7 src/mesa/drivers/dri/i965/brw_tcs.c | 16 ++--- src/mesa/drivers/dri/i965/brw_tes.c | 10 -- src/mesa/drivers/dri/i965/brw_vs.c | 37 +--- src/mesa/drivers/dri/i965/brw_wm.c | 60 ++--- 11 files changed, 159 insertions(+), 46 deletions(-) Applies on top of a bunch of my other patches. 'shader-nos' of my tree. diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index a227a61f654..ab4463e0870 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -320,6 +320,17 @@ struct brw_state_flags { /** Subclass of Mesa program */ struct brw_program { struct gl_program program; + + /** +* Non-orthogonal state bits, indicating which dirty bits we need to +* listen to in order to fill out this shader's program key. +* +* This also includes BRW_NEW_*_PROGRAM, so that the state upload code +* can listen to the bits stored in this field, without having to OR an +* additional bit in to detect program changes. +*/ + struct brw_state_flags nos; + GLuint id; bool compiled_once; diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c index cc564a012b6..6a6c1ba5bb0 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.c +++ b/src/mesa/drivers/dri/i965/brw_cs.c @@ -171,6 +171,11 @@ brw_codegen_cs_prog(struct brw_context *brw, return true; } +void +brw_cs_record_nos(const struct brw_context *brw, struct brw_program *bprog) +{ + bprog->nos.brw = BRW_NEW_COMPUTE_PROGRAM; +} static void brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key) @@ -200,7 +205,7 @@ brw_upload_cs_prog(struct brw_context *brw) if (!cp) return; - if (!brw_state_dirty(brw, _NEW_TEXTURE, BRW_NEW_COMPUTE_PROGRAM)) + if (!brw_shader_state_dirty(brw, cp)) return; brw->cs.base.sampler_count = diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 179ccc4c6fb..5bb3fd72e9b 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -171,13 +171,12 @@ brw_codegen_gs_prog(struct brw_context *brw, return true; } -static bool -brw_gs_state_dirty(const struct brw_context *brw) +void +brw_gs_record_nos(const struct brw_context *brw, struct brw_program *bprog) { - return brw_state_dirty(brw, - _NEW_TEXTURE, - BRW_NEW_GEOMETRY_PROGRAM | - BRW_NEW_TRANSFORM_FEEDBACK); + bprog->nos.brw = BRW_NEW_GEOMETRY_PROGRAM; + + bprog->nos.brw |= BRW_NEW_TRANSFORM_FEEDBACK; } void @@ -203,7 +202,7 @@ brw_upload_gs_prog(struct brw_context *brw) /* BRW_NEW_GEOMETRY_PROGRAM */ struct brw_program *gp = (struct brw_program *) brw->geometry_program; - if (!brw_gs_state_dirty(brw)) + if (!brw_shader_state_dirty(brw, gp)) return; brw_gs_populate_key(brw, ); diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp index e9158c596c5..ec1f66906a0 100644 --- a/src/mesa/drivers/dri/i965/brw_link.cpp +++ b/src/mesa/drivers/dri/i965/brw_link.cpp @@ -249,6 +249,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) compiler->scalar_stage[stage]); infos[stage] = >nir->info; + brw_record_nos(brw, brw_program(prog)); + /* Make a pass over the IR to add state references for any built-in * uniforms that are
Re: [Mesa-dev] [PATCH 1/2] gallium/docs: improve docs for SAMPLE_POS, SAMPLE_INFO, TXQS, MSAA semantics
Am 23.08.2017 um 01:59 schrieb Marek Olšák: > On Wed, Aug 23, 2017 at 12:30 AM, Roland Scheidegger> wrote: >> Am 22.08.2017 um 17:15 schrieb Marek Olšák: >>> On Sun, Aug 20, 2017 at 12:32 AM, Roland Scheidegger >>> wrote: Am 19.08.2017 um 21:32 schrieb Marek Olšák: > How about we remove all opcodes that are unused? Like: > > SAMPLE_POS > SAMPLE_INFO > SAMPLE > SAMPLE_I > SAMPLE_I_MS > SAMPLE_B > SAMPLE_C > SAMPLE_C_LZ > SAMPLE_D > SAMPLE_L > GATHER4 > SVIEWINFO These are all d3d10 opcodes, and we need them (llvmpipe supports all of them with the exception of sample_pos and sample_info, right now). (It's >>> >>> SAMPLE_INFO is almost the same as TXQS and given the current state of >>> driver support, it would be better to remove SAMPLE_INFO and keep >>> TXQS. >>> >>> SAMPLE_INFO returns (samples, 0, 0, 0), while TXQS returns (samples, >>> undef, undef, undef). >>> >>> There is also RESQ, which returns (w, h, d|layers, samples). >>> >> >> They take different register types, however. > > Most instructions support multiple register types. MOV supports TEMP, > CONST, IN, OUT. LOAD supports IMAGE, BUFFER, and in the future maybe > also CONSTBUF and SAMP. > That's true, but there aren't really any opcodes which could take either sampler view reg file or sampler. Albeit I suppose it would be doable. Though it looks to me like you could easily ditch TXQS in favor of RESQ too then... Roland ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] gallium/docs: improve docs for SAMPLE_POS, SAMPLE_INFO, TXQS, MSAA semantics
On Wed, Aug 23, 2017 at 12:30 AM, Roland Scheideggerwrote: > Am 22.08.2017 um 17:15 schrieb Marek Olšák: >> On Sun, Aug 20, 2017 at 12:32 AM, Roland Scheidegger >> wrote: >>> Am 19.08.2017 um 21:32 schrieb Marek Olšák: How about we remove all opcodes that are unused? Like: SAMPLE_POS SAMPLE_INFO SAMPLE SAMPLE_I SAMPLE_I_MS SAMPLE_B SAMPLE_C SAMPLE_C_LZ SAMPLE_D SAMPLE_L GATHER4 SVIEWINFO >>> These are all d3d10 opcodes, and we need them (llvmpipe supports all of >>> them with the exception of sample_pos and sample_info, right now). (It's >> >> SAMPLE_INFO is almost the same as TXQS and given the current state of >> driver support, it would be better to remove SAMPLE_INFO and keep >> TXQS. >> >> SAMPLE_INFO returns (samples, 0, 0, 0), while TXQS returns (samples, >> undef, undef, undef). >> >> There is also RESQ, which returns (w, h, d|layers, samples). >> > > They take different register types, however. Most instructions support multiple register types. MOV supports TEMP, CONST, IN, OUT. LOAD supports IMAGE, BUFFER, and in the future maybe also CONSTBUF and SAMP. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] gallium: add CONSTBUF type to tgsi_file_type
On 23/08/17 09:42, Ilia Mirkin wrote: On Tue, Aug 22, 2017 at 7:41 PM, Timothy Arceriwrote: On 23/08/17 09:28, Ilia Mirkin wrote: On Tue, Aug 22, 2017 at 7:25 PM, Timothy Arceri wrote: On 23/08/17 09:08, Ilia Mirkin wrote: On Tue, Aug 22, 2017 at 6:55 PM, Timothy Arceri wrote: On 23/08/17 00:56, Ilia Mirkin wrote: On Tue, Aug 22, 2017 at 10:51 AM, Roland Scheidegger wrote: I am probably missing something here, but why do you need a new register file? Since you couldn't use LOAD with TGSI_FILE_CONSTANT before, can't you just allow LOAD with TGSI_FILE_CONSTANT and achieve the same thing? Or do you need to know how it's going to be accessed in advance? With bindless, LOAD can take a CONST I believe [which contains the value of the bindless id]. I think it's nice to keep those concepts separate... having CONST sometimes mean the value and other times mean the address is a bit weird. This way CONSTBUF[0] is the address of the 0th constbuf. Yeah. I think we also may need another type for bindless as I'm planning to use TGSI_FILE_CONSTANT for regular uniforms. The plan is to use LOAD for supporting packed uniforms rather than padding everything to vec4. Shouldn't be necessary... we can think of CONST (and TEMP and IMM) as "value" registers, and MEMORY/IMAGE/BUFFER/CONSTBUF as "address" registers. If LOAD receives a value, then it's a bindless image handle, otherwise it should work based on which of the address registers it receives. But how do you tell the difference between a bindless image handle and a non-indirect uniform where the "value" is just the index of the uniform? Easy - if the first arg is a CONSTBUF[], it's a uniform load. If it's a value, then it's a bindless image handle. A uniform load becomes LOAD dst, CONSTBUF[1], IMM[0].x which would be identical to doing MOV dst, CONST[1][5] (if IMM[0].x == 5) I'm talking about using: CONSTBUF for UBOs CONSTANT for uniforms SOMETHINGELSE for bindless images As far as I can tell we need to differentiate between uniforms and ubos, and there doesn't seem to be anything else to help with that. Gallium doesn't differentiate between uniform and UBO. In practice, st/mesa sticks uniforms in the zero const slot. Yeah ok, looking at the code again that makes sense. Thanks for clarifying. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] gallium: add CONSTBUF type to tgsi_file_type
On Tue, Aug 22, 2017 at 7:41 PM, Timothy Arceriwrote: > > > On 23/08/17 09:28, Ilia Mirkin wrote: >> >> On Tue, Aug 22, 2017 at 7:25 PM, Timothy Arceri >> wrote: >>> >>> >>> >>> On 23/08/17 09:08, Ilia Mirkin wrote: On Tue, Aug 22, 2017 at 6:55 PM, Timothy Arceri wrote: > > > > > On 23/08/17 00:56, Ilia Mirkin wrote: >> >> >> >> On Tue, Aug 22, 2017 at 10:51 AM, Roland Scheidegger >> >> wrote: >>> >>> >>> >>> I am probably missing something here, but why do you need a new >>> register >>> file? Since you couldn't use LOAD with TGSI_FILE_CONSTANT before, >>> can't >>> you just allow LOAD with TGSI_FILE_CONSTANT and achieve the same >>> thing? >>> Or do you need to know how it's going to be accessed in advance? >> >> >> >> >> With bindless, LOAD can take a CONST I believe [which contains the >> value of the bindless id]. I think it's nice to keep those concepts >> separate... having CONST sometimes mean the value and other times mean >> the address is a bit weird. This way CONSTBUF[0] is the address of the >> 0th constbuf. > > > > > Yeah. I think we also may need another type for bindless as I'm > planning > to > use TGSI_FILE_CONSTANT for regular uniforms. The plan is to use LOAD > for > supporting packed uniforms rather than padding everything to vec4. Shouldn't be necessary... we can think of CONST (and TEMP and IMM) as "value" registers, and MEMORY/IMAGE/BUFFER/CONSTBUF as "address" registers. If LOAD receives a value, then it's a bindless image handle, otherwise it should work based on which of the address registers it receives. >>> >>> >>> >>> But how do you tell the difference between a bindless image handle and a >>> non-indirect uniform where the "value" is just the index of the uniform? >> >> >> Easy - if the first arg is a CONSTBUF[], it's a uniform load. If it's >> a value, then it's a bindless image handle. A uniform load becomes >> >> LOAD dst, CONSTBUF[1], IMM[0].x >> >> which would be identical to doing >> >> MOV dst, CONST[1][5] (if IMM[0].x == 5) >> > > I'm talking about using: > > CONSTBUF for UBOs > > CONSTANT for uniforms > > SOMETHINGELSE for bindless images > > As far as I can tell we need to differentiate between uniforms and ubos, and > there doesn't seem to be anything else to help with that. Gallium doesn't differentiate between uniform and UBO. In practice, st/mesa sticks uniforms in the zero const slot. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] gallium: add CONSTBUF type to tgsi_file_type
On 23/08/17 09:28, Ilia Mirkin wrote: On Tue, Aug 22, 2017 at 7:25 PM, Timothy Arceriwrote: On 23/08/17 09:08, Ilia Mirkin wrote: On Tue, Aug 22, 2017 at 6:55 PM, Timothy Arceri wrote: On 23/08/17 00:56, Ilia Mirkin wrote: On Tue, Aug 22, 2017 at 10:51 AM, Roland Scheidegger wrote: I am probably missing something here, but why do you need a new register file? Since you couldn't use LOAD with TGSI_FILE_CONSTANT before, can't you just allow LOAD with TGSI_FILE_CONSTANT and achieve the same thing? Or do you need to know how it's going to be accessed in advance? With bindless, LOAD can take a CONST I believe [which contains the value of the bindless id]. I think it's nice to keep those concepts separate... having CONST sometimes mean the value and other times mean the address is a bit weird. This way CONSTBUF[0] is the address of the 0th constbuf. Yeah. I think we also may need another type for bindless as I'm planning to use TGSI_FILE_CONSTANT for regular uniforms. The plan is to use LOAD for supporting packed uniforms rather than padding everything to vec4. Shouldn't be necessary... we can think of CONST (and TEMP and IMM) as "value" registers, and MEMORY/IMAGE/BUFFER/CONSTBUF as "address" registers. If LOAD receives a value, then it's a bindless image handle, otherwise it should work based on which of the address registers it receives. But how do you tell the difference between a bindless image handle and a non-indirect uniform where the "value" is just the index of the uniform? Easy - if the first arg is a CONSTBUF[], it's a uniform load. If it's a value, then it's a bindless image handle. A uniform load becomes LOAD dst, CONSTBUF[1], IMM[0].x which would be identical to doing MOV dst, CONST[1][5] (if IMM[0].x == 5) I'm talking about using: CONSTBUF for UBOs CONSTANT for uniforms SOMETHINGELSE for bindless images As far as I can tell we need to differentiate between uniforms and ubos, and there doesn't seem to be anything else to help with that. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] gallium: add CONSTBUF type to tgsi_file_type
On Tue, Aug 22, 2017 at 7:25 PM, Timothy Arceriwrote: > > > On 23/08/17 09:08, Ilia Mirkin wrote: >> >> On Tue, Aug 22, 2017 at 6:55 PM, Timothy Arceri >> wrote: >>> >>> >>> >>> On 23/08/17 00:56, Ilia Mirkin wrote: On Tue, Aug 22, 2017 at 10:51 AM, Roland Scheidegger wrote: > > > I am probably missing something here, but why do you need a new > register > file? Since you couldn't use LOAD with TGSI_FILE_CONSTANT before, can't > you just allow LOAD with TGSI_FILE_CONSTANT and achieve the same thing? > Or do you need to know how it's going to be accessed in advance? With bindless, LOAD can take a CONST I believe [which contains the value of the bindless id]. I think it's nice to keep those concepts separate... having CONST sometimes mean the value and other times mean the address is a bit weird. This way CONSTBUF[0] is the address of the 0th constbuf. >>> >>> >>> >>> Yeah. I think we also may need another type for bindless as I'm planning >>> to >>> use TGSI_FILE_CONSTANT for regular uniforms. The plan is to use LOAD for >>> supporting packed uniforms rather than padding everything to vec4. >> >> >> Shouldn't be necessary... we can think of CONST (and TEMP and IMM) as >> "value" registers, and MEMORY/IMAGE/BUFFER/CONSTBUF as "address" >> registers. If LOAD receives a value, then it's a bindless image >> handle, otherwise it should work based on which of the address >> registers it receives. > > > But how do you tell the difference between a bindless image handle and a > non-indirect uniform where the "value" is just the index of the uniform? Easy - if the first arg is a CONSTBUF[], it's a uniform load. If it's a value, then it's a bindless image handle. A uniform load becomes LOAD dst, CONSTBUF[1], IMM[0].x which would be identical to doing MOV dst, CONST[1][5] (if IMM[0].x == 5) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] gallium: add CONSTBUF type to tgsi_file_type
On 23/08/17 09:08, Ilia Mirkin wrote: On Tue, Aug 22, 2017 at 6:55 PM, Timothy Arceriwrote: On 23/08/17 00:56, Ilia Mirkin wrote: On Tue, Aug 22, 2017 at 10:51 AM, Roland Scheidegger wrote: I am probably missing something here, but why do you need a new register file? Since you couldn't use LOAD with TGSI_FILE_CONSTANT before, can't you just allow LOAD with TGSI_FILE_CONSTANT and achieve the same thing? Or do you need to know how it's going to be accessed in advance? With bindless, LOAD can take a CONST I believe [which contains the value of the bindless id]. I think it's nice to keep those concepts separate... having CONST sometimes mean the value and other times mean the address is a bit weird. This way CONSTBUF[0] is the address of the 0th constbuf. Yeah. I think we also may need another type for bindless as I'm planning to use TGSI_FILE_CONSTANT for regular uniforms. The plan is to use LOAD for supporting packed uniforms rather than padding everything to vec4. Shouldn't be necessary... we can think of CONST (and TEMP and IMM) as "value" registers, and MEMORY/IMAGE/BUFFER/CONSTBUF as "address" registers. If LOAD receives a value, then it's a bindless image handle, otherwise it should work based on which of the address registers it receives. But how do you tell the difference between a bindless image handle and a non-indirect uniform where the "value" is just the index of the uniform? -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 7/8] glsl: stop adding pointers from shader_info to the cache
On 22/08/17 03:19, Samuel Pitoiset wrote: Looks like you no longer encode shader_info::stage (which is the first field), is that expected? Nice catch! I guess we probably don't use this field after compilation, although we should still fix it so I've sent a 6.5 patch to reorder the members. Thanks. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6.5/8] compiler: move pointers to the start of shader_info
This will allow us to easily skip them when writting the struct to disk cache. --- src/compiler/shader_info.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h index a67084156d..38413940d6 100644 --- a/src/compiler/shader_info.h +++ b/src/compiler/shader_info.h @@ -25,28 +25,28 @@ #ifndef SHADER_INFO_H #define SHADER_INFO_H #include "shader_enums.h" #ifdef __cplusplus extern "C" { #endif typedef struct shader_info { - /** The shader stage, such as MESA_SHADER_VERTEX. */ - gl_shader_stage stage; - const char *name; /* Descriptive name provided by the client; may be NULL */ const char *label; + /** The shader stage, such as MESA_SHADER_VERTEX. */ + gl_shader_stage stage; + /* Number of textures used by this shader */ unsigned num_textures; /* Number of uniform buffers used by this shader */ unsigned num_ubos; /* Number of atomic buffers used by this shader */ unsigned num_abos; /* Number of shader storage buffers used by this shader */ unsigned num_ssbos; /* Number of images used by this shader */ unsigned num_images; -- 2.13.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] i965: Drop useless gen6_brw_upload_ff_gs_prog() wrapper.
gen6...brw? Drop some baklava layers. --- src/mesa/drivers/dri/i965/brw_ff_gs.c | 5 - src/mesa/drivers/dri/i965/brw_ff_gs.h | 1 - src/mesa/drivers/dri/i965/brw_gs.c| 2 +- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.c b/src/mesa/drivers/dri/i965/brw_ff_gs.c index a3919524df1..27c99141927 100644 --- a/src/mesa/drivers/dri/i965/brw_ff_gs.c +++ b/src/mesa/drivers/dri/i965/brw_ff_gs.c @@ -256,8 +256,3 @@ brw_upload_ff_gs_prog(struct brw_context *brw) } } } - -void gen6_brw_upload_ff_gs_prog(struct brw_context *brw) -{ - brw_upload_ff_gs_prog(brw); -} diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.h b/src/mesa/drivers/dri/i965/brw_ff_gs.h index bac2730b7e4..b32b20d89bd 100644 --- a/src/mesa/drivers/dri/i965/brw_ff_gs.h +++ b/src/mesa/drivers/dri/i965/brw_ff_gs.h @@ -110,7 +110,6 @@ void brw_ff_gs_lines(struct brw_ff_gs_compile *c); void gen6_sol_program(struct brw_ff_gs_compile *c, struct brw_ff_gs_prog_key *key, unsigned num_verts, bool check_edge_flag); -void gen6_brw_upload_ff_gs_prog(struct brw_context *brw); void brw_upload_ff_gs_prog(struct brw_context *brw); diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index bd8f993d11b..9ba94c45c8f 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -210,7 +210,7 @@ brw_upload_gs_prog(struct brw_context *brw) /* No geometry shader. Vertex data just passes straight through. */ if (brw->gen == 6 && (brw->ctx.NewDriverState & BRW_NEW_TRANSFORM_FEEDBACK)) { - gen6_brw_upload_ff_gs_prog(brw); + brw_upload_ff_gs_prog(brw); return; } -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] i965: Fix state flagging of Gen6 SOL programs.
It doesn't seem like the old code could possibly work. 1. brw_gs_state_dirty made us bail unless one of these flags were set: _NEW_TEXTURE, BRW_NEW_GEOMETRY_PROGRAM, BRW_NEW_TRANSFORM_FEEDBACK 2. If there was no geometry program, we called brw_upload_ff_gs_prog()3 3. That checked brw_ff_gs_state_dirty and bailed unless these were set: _NEW_LIGHT, BRW_NEW_PRIMITIVE, BRW_NEW_TRANSFORM_FEEDBACK, BRW_NEW_VS_PROG_DATA. 4. brw_ff_gs_prog_key pv_first and attr fields were set based on data depending on _NEW_LIGHT and BRW_NEW_VS_PROG_DATA. This means that if we needed a FF GS program, and changed the VS outputs or provoking vertex mode, we'd fail to notice that we needed to emit a new program. --- src/mesa/drivers/dri/i965/brw_gs.c | 16 src/mesa/drivers/dri/i965/brw_state_upload.c | 9 ++--- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 9ba94c45c8f..179ccc4c6fb 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -206,22 +206,6 @@ brw_upload_gs_prog(struct brw_context *brw) if (!brw_gs_state_dirty(brw)) return; - if (gp == NULL) { - /* No geometry shader. Vertex data just passes straight through. */ - if (brw->gen == 6 && - (brw->ctx.NewDriverState & BRW_NEW_TRANSFORM_FEEDBACK)) { - brw_upload_ff_gs_prog(brw); - return; - } - - /* Other state atoms had better not try to access prog_data, since - * there's no GS program. - */ - brw->gs.base.prog_data = NULL; - - return; - } - brw_gs_populate_key(brw, ); if (!brw_search_cache(>cache, BRW_CACHE_GS_PROG, diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 1ae45ba2ac1..1608af4f3bd 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -395,10 +395,13 @@ brw_upload_programs(struct brw_context *brw, brw_upload_vs_prog(brw); brw_upload_tess_programs(brw); - if (brw->gen < 6) - brw_upload_ff_gs_prog(brw); - else + if (brw->geometry_program) { brw_upload_gs_prog(brw); + } else { + brw->gs.base.prog_data = NULL; + if (brw->gen < 7) +brw_upload_ff_gs_prog(brw); + } /* Update the VUE map for data exiting the GS stage of the pipeline. * This comes from the last enabled shader stage. -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] gallium: add CONSTBUF type to tgsi_file_type
On Tue, Aug 22, 2017 at 6:55 PM, Timothy Arceriwrote: > > > On 23/08/17 00:56, Ilia Mirkin wrote: >> >> On Tue, Aug 22, 2017 at 10:51 AM, Roland Scheidegger >> wrote: >>> >>> I am probably missing something here, but why do you need a new register >>> file? Since you couldn't use LOAD with TGSI_FILE_CONSTANT before, can't >>> you just allow LOAD with TGSI_FILE_CONSTANT and achieve the same thing? >>> Or do you need to know how it's going to be accessed in advance? >> >> >> With bindless, LOAD can take a CONST I believe [which contains the >> value of the bindless id]. I think it's nice to keep those concepts >> separate... having CONST sometimes mean the value and other times mean >> the address is a bit weird. This way CONSTBUF[0] is the address of the >> 0th constbuf. > > > Yeah. I think we also may need another type for bindless as I'm planning to > use TGSI_FILE_CONSTANT for regular uniforms. The plan is to use LOAD for > supporting packed uniforms rather than padding everything to vec4. Shouldn't be necessary... we can think of CONST (and TEMP and IMM) as "value" registers, and MEMORY/IMAGE/BUFFER/CONSTBUF as "address" registers. If LOAD receives a value, then it's a bindless image handle, otherwise it should work based on which of the address registers it receives. -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] gallium: add CONSTBUF type to tgsi_file_type
On 23/08/17 00:56, Ilia Mirkin wrote: On Tue, Aug 22, 2017 at 10:51 AM, Roland Scheideggerwrote: I am probably missing something here, but why do you need a new register file? Since you couldn't use LOAD with TGSI_FILE_CONSTANT before, can't you just allow LOAD with TGSI_FILE_CONSTANT and achieve the same thing? Or do you need to know how it's going to be accessed in advance? With bindless, LOAD can take a CONST I believe [which contains the value of the bindless id]. I think it's nice to keep those concepts separate... having CONST sometimes mean the value and other times mean the address is a bit weird. This way CONSTBUF[0] is the address of the 0th constbuf. Yeah. I think we also may need another type for bindless as I'm planning to use TGSI_FILE_CONSTANT for regular uniforms. The plan is to use LOAD for supporting packed uniforms rather than padding everything to vec4. -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Drop Gen7+ nonsense from brw_ff_gs.c.
Yeah I've noticed this before. Reviewed-by: Timothy ArceriOn 23/08/17 07:58, Kenneth Graunke wrote: brw_ff_gs.c is about using the geometry shader to implement things that the fixed function ought to do, but doesn't on old hardware. Gen7+ does not need this. We should drop the misleading comment about Gen7 not using geometry shaders. --- src/mesa/drivers/dri/i965/brw_ff_gs.c | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.c b/src/mesa/drivers/dri/i965/brw_ff_gs.c index b7b4b716011..a3919524df1 100644 --- a/src/mesa/drivers/dri/i965/brw_ff_gs.c +++ b/src/mesa/drivers/dri/i965/brw_ff_gs.c @@ -170,6 +170,8 @@ brw_ff_gs_populate_key(struct brw_context *brw, struct gl_context *ctx = >ctx; + assert(brw->gen < 7); + memset(key, 0, sizeof(*key)); /* BRW_NEW_VS_PROG_DATA (part of VUE map) */ @@ -187,10 +189,7 @@ brw_ff_gs_populate_key(struct brw_context *brw, key->pv_first = true; } - if (brw->gen >= 7) { - /* On Gen7 and later, we don't use GS (yet). */ - key->need_gs_prog = false; - } else if (brw->gen == 6) { + if (brw->gen == 6) { /* On Gen6, GS is used for transform feedback. */ /* BRW_NEW_TRANSFORM_FEEDBACK */ if (_mesa_is_xfb_active_and_unpaused(ctx)) { ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 102017] Wrong colours in Cities Skyline
https://bugs.freedesktop.org/show_bug.cgi?id=102017 --- Comment #12 from Timothy Arceri--- (In reply to Thomas Jollans from comment #9) > I'm having the same issue, with Ubuntu 17.04 stock mesa and r600 (HD 6870) > > It looks to me like an issue with textures not being drawn; Everything is > displayed with nice reflective surfaces. In the in-game data views (where > most surfaces are blank), the colours are largely fine, but there are some > glitches in details (e.g. water, road bridges). > > I've just upgraded from Ubuntu 16.04 LTS. Before the upgrade, everything > worked beautifully with the same game version. If you can figure out which version of Mesa is used by 16.04 LTS and 17.04, I would suggest doing a git bisect to find the change that broke things. Bisecting the commit will greatly increase the chance of this being fixed. Feel free to ask for help on the #dri-devel freenode channel if you need assistance. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] gallium/docs: improve docs for SAMPLE_POS, SAMPLE_INFO, TXQS, MSAA semantics
Am 22.08.2017 um 17:15 schrieb Marek Olšák: > On Sun, Aug 20, 2017 at 12:32 AM, Roland Scheidegger> wrote: >> Am 19.08.2017 um 21:32 schrieb Marek Olšák: >>> How about we remove all opcodes that are unused? Like: >>> >>> SAMPLE_POS >>> SAMPLE_INFO >>> SAMPLE >>> SAMPLE_I >>> SAMPLE_I_MS >>> SAMPLE_B >>> SAMPLE_C >>> SAMPLE_C_LZ >>> SAMPLE_D >>> SAMPLE_L >>> GATHER4 >>> SVIEWINFO >> These are all d3d10 opcodes, and we need them (llvmpipe supports all of >> them with the exception of sample_pos and sample_info, right now). (It's > > SAMPLE_INFO is almost the same as TXQS and given the current state of > driver support, it would be better to remove SAMPLE_INFO and keep > TXQS. > > SAMPLE_INFO returns (samples, 0, 0, 0), while TXQS returns (samples, > undef, undef, undef). > > There is also RESQ, which returns (w, h, d|layers, samples). > They take different register types, however. Roland ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: Drop Gen7+ nonsense from brw_ff_gs.c.
brw_ff_gs.c is about using the geometry shader to implement things that the fixed function ought to do, but doesn't on old hardware. Gen7+ does not need this. We should drop the misleading comment about Gen7 not using geometry shaders. --- src/mesa/drivers/dri/i965/brw_ff_gs.c | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.c b/src/mesa/drivers/dri/i965/brw_ff_gs.c index b7b4b716011..a3919524df1 100644 --- a/src/mesa/drivers/dri/i965/brw_ff_gs.c +++ b/src/mesa/drivers/dri/i965/brw_ff_gs.c @@ -170,6 +170,8 @@ brw_ff_gs_populate_key(struct brw_context *brw, struct gl_context *ctx = >ctx; + assert(brw->gen < 7); + memset(key, 0, sizeof(*key)); /* BRW_NEW_VS_PROG_DATA (part of VUE map) */ @@ -187,10 +189,7 @@ brw_ff_gs_populate_key(struct brw_context *brw, key->pv_first = true; } - if (brw->gen >= 7) { - /* On Gen7 and later, we don't use GS (yet). */ - key->need_gs_prog = false; - } else if (brw->gen == 6) { + if (brw->gen == 6) { /* On Gen6, GS is used for transform feedback. */ /* BRW_NEW_TRANSFORM_FEEDBACK */ if (_mesa_is_xfb_active_and_unpaused(ctx)) { -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 102038] assertion failure in update_framebuffer_size
https://bugs.freedesktop.org/show_bug.cgi?id=102038 Brian Paulchanged: What|Removed |Added Attachment #133654|0 |1 is obsolete|| --- Comment #12 from Brian Paul --- Created attachment 133687 --> https://bugs.freedesktop.org/attachment.cgi?id=133687=edit new patch to try. Hi Bruce, here's another patch to try. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/7] i965: Add a brw_wm_prog_data::has_render_target_reads field.
State upload code should use prog_data rather than poking at shader_info directly. --- src/intel/compiler/brw_compiler.h| 1 + src/intel/compiler/brw_fs.cpp| 2 ++ src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 6 ++ 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 66d6a6f5ee8..6753a8daf08 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -614,6 +614,7 @@ struct brw_wm_prog_data { bool uses_src_depth; bool uses_src_w; bool uses_sample_mask; + bool has_render_target_reads; bool has_side_effects; bool pulls_bary; diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index b48dc4167e7..f2596e38861 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6544,6 +6544,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, shader->info.fs.uses_sample_qualifier || shader->info.outputs_read); + prog_data->has_render_target_reads = shader->info.outputs_read != 0ull; + prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests; prog_data->post_depth_coverage = shader->info.fs.post_depth_coverage; prog_data->inner_coverage = shader->info.fs.inner_coverage; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index dc1a770a5d3..5cfdbe58102 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1049,9 +1049,8 @@ update_renderbuffer_read_surfaces(struct brw_context *brw) const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(brw->wm.base.prog_data); - /* BRW_NEW_FRAGMENT_PROGRAM */ - if (!ctx->Extensions.MESA_shader_framebuffer_fetch && - brw->fragment_program && brw->fragment_program->info.outputs_read) { + if (wm_prog_data->has_render_target_reads && + !ctx->Extensions.MESA_shader_framebuffer_fetch) { /* _NEW_BUFFERS */ const struct gl_framebuffer *fb = ctx->DrawBuffer; @@ -1117,7 +1116,6 @@ const struct brw_tracked_state brw_renderbuffer_read_surfaces = { .mesa = _NEW_BUFFERS, .brw = BRW_NEW_BATCH | BRW_NEW_FAST_CLEAR_COLOR | - BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA, }, .emit = update_renderbuffer_read_surfaces, -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/7] i965: Devirtualize update_renderbuffer_surface.
Replace piles of my own boilerplate with 1-2 lines of code. --- src/mesa/drivers/dri/i965/brw_context.c | 4 src/mesa/drivers/dri/i965/brw_context.h | 5 - src/mesa/drivers/dri/i965/brw_state.h| 4 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 22 +- 4 files changed, 5 insertions(+), 30 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 3380582b3fa..2dbcc450860 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -882,16 +882,12 @@ brwCreateContext(gl_api api, brw->gs.base.stage = MESA_SHADER_GEOMETRY; brw->wm.base.stage = MESA_SHADER_FRAGMENT; if (brw->gen >= 8) { - gen6_init_vtable_surface_functions(brw); brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz; } else if (brw->gen >= 7) { - gen6_init_vtable_surface_functions(brw); brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz; } else if (brw->gen >= 6) { - gen6_init_vtable_surface_functions(brw); brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz; } else { - gen4_init_vtable_surface_functions(brw); brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz; } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 94c0a1b9636..2274fe5c80e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -633,11 +633,6 @@ struct brw_context struct { - uint32_t (*update_renderbuffer_surface)(struct brw_context *brw, - struct gl_renderbuffer *rb, - unsigned unit, - uint32_t surf_index); - /** * Send the appropriate state packets to configure depth, stencil, and * HiZ buffers (i965+ only) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index b3196427840..c9fd9414826 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -195,7 +195,6 @@ void *brw_state_batch(struct brw_context *brw, uint32_t brw_state_batch_size(struct brw_context *brw, uint32_t offset); /* brw_wm_surface_state.c */ -void gen4_init_vtable_surface_functions(struct brw_context *brw); uint32_t brw_get_surface_tiling_bits(uint32_t tiling); uint32_t brw_get_surface_num_multisamples(unsigned num_samples); enum isl_format brw_isl_format_for_mesa_format(mesa_format mesa_format); @@ -247,9 +246,6 @@ void brw_emit_sampler_state(struct brw_context *brw, bool non_normalized_coordinates, uint32_t border_color_offset); -/* gen6_surface_state.c */ -void gen6_init_vtable_surface_functions(struct brw_context *brw); - /* brw_vs_surface_state.c */ void brw_upload_pull_constants(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index f63b9d2ed51..2d7de54dcdb 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1000,11 +1000,12 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw, if (fb->_NumColorDrawBuffers >= 1) { for (i = 0; i < fb->_NumColorDrawBuffers; i++) { const uint32_t surf_index = render_target_start + i; + struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i]; -if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) { -surf_offset[surf_index] = - brw->vtbl.update_renderbuffer_surface( - brw, fb->_ColorDrawBuffers[i], i, surf_index); +if (intel_renderbuffer(rb)) { +surf_offset[surf_index] = brw->gen >= 6 ? + gen6_update_renderbuffer_surface(brw, rb, i, surf_index) : + gen4_update_renderbuffer_surface(brw, rb, i, surf_index); } else { emit_null_surface_state(brw, w, h, s, _offset[surf_index]); } @@ -1669,19 +1670,6 @@ const struct brw_tracked_state brw_wm_image_surfaces = { .emit = brw_upload_wm_image_surfaces, }; -void -gen4_init_vtable_surface_functions(struct brw_context *brw) -{ - brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface; -} - -void -gen6_init_vtable_surface_functions(struct brw_context *brw) -{ - gen4_init_vtable_surface_functions(brw); - brw->vtbl.update_renderbuffer_surface = gen6_update_renderbuffer_surface; -} - static void brw_upload_cs_work_groups_surface(struct brw_context *brw) { -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 7/7] i965: Stop using wm_prog_data->binding_table.render_target_start.
Render target surfaces always start at binding table index 0. This is required for us to use headerless FB writes, which we really want to do. So, we'll never change that. Given that, it's not necessary to look up a wm_prog_data field which we already know contains 0. We can drop the dependency in brw_renderbuffer_surfaces (Gen4-5)...which was already confusingly missing from gen6_renderbuffer_surfaces. --- src/intel/compiler/brw_fs_generator.cpp | 9 +++-- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 10 +++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 2ade486705b..c101c4696ef 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -277,8 +277,13 @@ fs_generator::fire_fb_write(fs_inst *inst, else msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; - uint32_t surf_index = - prog_data->binding_table.render_target_start + inst->target; + /* We assume render targets start at 0, because headerless FB write +* messages set "Render Target Index" to 0. Using a different binding +* table index would make it impossible to use headerless messages. +*/ + assert(prog_data->binding_table.render_target_start == 0); + + uint32_t surf_index = inst->target; bool last_render_target = inst->eot || (prog_data->dual_src_blend && dispatch_width == 16); diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 5cfdbe58102..8c901df8e97 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -990,14 +990,11 @@ update_renderbuffer_surfaces(struct brw_context *brw) { const struct gl_context *ctx = >ctx; - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_wm_prog_data *wm_prog_data = - brw_wm_prog_data(brw->wm.base.prog_data); - /* _NEW_BUFFERS | _NEW_COLOR */ const struct gl_framebuffer *fb = ctx->DrawBuffer; - const unsigned rt_start = wm_prog_data->binding_table.render_target_start; + /* Render targets always start at binding table index 0. */ + const unsigned rt_start = 0; uint32_t *surf_offsets = brw->wm.base.surf_offset; @@ -1025,8 +1022,7 @@ const struct brw_tracked_state brw_renderbuffer_surfaces = { .dirty = { .mesa = _NEW_BUFFERS | _NEW_COLOR, - .brw = BRW_NEW_BATCH | - BRW_NEW_FS_PROG_DATA, + .brw = BRW_NEW_BATCH, }, .emit = update_renderbuffer_surfaces, }; -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/7] i965: Make brw_update_renderbuffer_surface static.
Also rename it to gen6_update_renderbuffer_surface, as this is the function for Gen6+. Having functions named "brw_*" and "gen4_*" is confusing...if we're using gens, let's stick with those. --- src/mesa/drivers/dri/i965/brw_state.h| 5 - src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 12 ++-- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index dc893e5b7bd..b3196427840 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -222,11 +222,6 @@ void brw_update_texture_surface(struct gl_context *ctx, unsigned unit, uint32_t *surf_offset, bool for_gather, uint32_t plane); -uint32_t brw_update_renderbuffer_surface(struct brw_context *brw, - struct gl_renderbuffer *rb, - uint32_t flags, unsigned unit, - uint32_t surf_index); - void brw_update_renderbuffer_surfaces(struct brw_context *brw, const struct gl_framebuffer *fb, uint32_t render_target_start, diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 358fdb48d44..8d6330032f9 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -206,11 +206,11 @@ brw_emit_surface_state(struct brw_context *brw, } } -uint32_t -brw_update_renderbuffer_surface(struct brw_context *brw, -struct gl_renderbuffer *rb, -uint32_t flags, unsigned unit, -uint32_t surf_index) +static uint32_t +gen6_update_renderbuffer_surface(struct brw_context *brw, + struct gl_renderbuffer *rb, + uint32_t flags, unsigned unit, + uint32_t surf_index) { struct gl_context *ctx = >ctx; struct intel_renderbuffer *irb = intel_renderbuffer(rb); @@ -1695,7 +1695,7 @@ void gen6_init_vtable_surface_functions(struct brw_context *brw) { gen4_init_vtable_surface_functions(brw); - brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface; + brw->vtbl.update_renderbuffer_surface = gen6_update_renderbuffer_surface; } static void -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/7] i965: Pass fb into emit_null_surface instead of dimensions.
We either want the framebuffer dimensions or 1x1x1. Passing fb and falling back to 1x1x1 lets us shorten some calls. --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 28 ++-- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 2d7de54dcdb..a0cb566e719 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -824,9 +824,7 @@ const struct brw_tracked_state brw_wm_pull_constants = { */ static void emit_null_surface_state(struct brw_context *brw, -unsigned width, -unsigned height, -unsigned samples, +const struct gl_framebuffer *fb, uint32_t *out_offset) { uint32_t *surf = brw_state_batch(brw, @@ -834,6 +832,11 @@ emit_null_surface_state(struct brw_context *brw, brw->isl_dev.ss.align, out_offset); + /* Use the fb dimensions or 1x1x1 */ + unsigned width = fb ? _mesa_geometric_width(fb) : 1; + unsigned height = fb ? _mesa_geometric_height(fb) : 1; + unsigned samples = fb ? _mesa_geometric_samples(fb) : 1; + if (brw->gen != 6 || samples <= 1) { isl_null_fill_state(>isl_dev, surf, isl_extent3d(width, height, 1)); @@ -992,9 +995,6 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw, uint32_t *surf_offset) { GLuint i; - const unsigned int w = _mesa_geometric_width(fb); - const unsigned int h = _mesa_geometric_height(fb); - const unsigned int s = _mesa_geometric_samples(fb); /* Update surfaces for drawing buffers */ if (fb->_NumColorDrawBuffers >= 1) { @@ -1007,12 +1007,12 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw, gen6_update_renderbuffer_surface(brw, rb, i, surf_index) : gen4_update_renderbuffer_surface(brw, rb, i, surf_index); } else { -emit_null_surface_state(brw, w, h, s, _offset[surf_index]); +emit_null_surface_state(brw, fb, _offset[surf_index]); } } } else { const uint32_t surf_index = render_target_start; - emit_null_surface_state(brw, w, h, s, _offset[surf_index]); + emit_null_surface_state(brw, fb, _offset[surf_index]); } } @@ -1117,11 +1117,7 @@ update_renderbuffer_read_surfaces(struct brw_context *brw) 0); } else { -emit_null_surface_state(brw, -_mesa_geometric_width(fb), -_mesa_geometric_height(fb), -_mesa_geometric_samples(fb), -surf_offset); +emit_null_surface_state(brw, fb, surf_offset); } } @@ -1294,7 +1290,7 @@ brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog, >UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding]; if (binding->BufferObject == ctx->Shared->NullBufferObj) { - emit_null_surface_state(brw, 1, 1, 1, _surf_offsets[i]); + emit_null_surface_state(brw, NULL, _surf_offsets[i]); } else { struct intel_buffer_object *intel_bo = intel_buffer_object(binding->BufferObject); @@ -1319,7 +1315,7 @@ brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog, >ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding]; if (binding->BufferObject == ctx->Shared->NullBufferObj) { - emit_null_surface_state(brw, 1, 1, 1, _surf_offsets[i]); + emit_null_surface_state(brw, NULL, _surf_offsets[i]); } else { struct intel_buffer_object *intel_bo = intel_buffer_object(binding->BufferObject); @@ -1611,7 +1607,7 @@ update_image_surface(struct brw_context *brw, } } else { - emit_null_surface_state(brw, 1, 1, 1, surf_offset); + emit_null_surface_state(brw, NULL, surf_offset); update_default_image_param(brw, u, surface_idx, param); } } -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/7] i965: Delete update_renderbuffer_surface flags.
We don't need yet another set of flags. The function already has access to both brw and the unit, so it can check brw->draw_aux_buffer_disabled itself in one line of code. The layered flag was only used to assert that Gen4-5 doesn't do layered rendering, which isn't that useful. --- src/mesa/drivers/dri/i965/brw_context.h | 2 +- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 24 2 files changed, 5 insertions(+), 21 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 932240bfc50..94c0a1b9636 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -635,7 +635,7 @@ struct brw_context { uint32_t (*update_renderbuffer_surface)(struct brw_context *brw, struct gl_renderbuffer *rb, - uint32_t flags, unsigned unit, + unsigned unit, uint32_t surf_index); /** diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 8d6330032f9..f63b9d2ed51 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -55,11 +55,6 @@ #include "brw_defines.h" #include "brw_wm.h" -enum { - INTEL_RENDERBUFFER_LAYERED = 1 << 0, - INTEL_AUX_BUFFER_DISABLED = 1 << 1, -}; - uint32_t tex_mocs[] = { [7] = GEN7_MOCS_L3, [8] = BDW_MOCS_WB, @@ -209,7 +204,7 @@ brw_emit_surface_state(struct brw_context *brw, static uint32_t gen6_update_renderbuffer_surface(struct brw_context *brw, struct gl_renderbuffer *rb, - uint32_t flags, unsigned unit, + unsigned unit, uint32_t surf_index) { struct gl_context *ctx = >ctx; @@ -217,14 +212,10 @@ gen6_update_renderbuffer_surface(struct brw_context *brw, struct intel_mipmap_tree *mt = irb->mt; enum isl_aux_usage aux_usage = + brw->draw_aux_buffer_disabled[unit] ? ISL_AUX_USAGE_NONE : intel_miptree_render_aux_usage(brw, mt, ctx->Color.sRGBEnabled, ctx->Color.BlendEnabled & (1 << unit)); - if (flags & INTEL_AUX_BUFFER_DISABLED) { - assert(brw->gen >= 9); - aux_usage = ISL_AUX_USAGE_NONE; - } - assert(brw_render_target_supported(brw, rb)); mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb)); @@ -897,7 +888,7 @@ emit_null_surface_state(struct brw_context *brw, static uint32_t gen4_update_renderbuffer_surface(struct brw_context *brw, struct gl_renderbuffer *rb, - uint32_t flags, unsigned unit, + unsigned unit, uint32_t surf_index) { struct gl_context *ctx = >ctx; @@ -911,9 +902,6 @@ gen4_update_renderbuffer_surface(struct brw_context *brw, mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb)); /* BRW_NEW_FS_PROG_DATA */ - assert(!(flags & INTEL_RENDERBUFFER_LAYERED)); - assert(!(flags & INTEL_AUX_BUFFER_DISABLED)); - if (rb->TexImage && !brw->has_surface_tile_offset) { intel_renderbuffer_get_tile_offsets(irb, _x, _y); @@ -1012,15 +1000,11 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw, if (fb->_NumColorDrawBuffers >= 1) { for (i = 0; i < fb->_NumColorDrawBuffers; i++) { const uint32_t surf_index = render_target_start + i; - const int flags = (_mesa_geometric_layers(fb) > 0 ? - INTEL_RENDERBUFFER_LAYERED : 0) | - (brw->draw_aux_buffer_disabled[i] ? - INTEL_AUX_BUFFER_DISABLED : 0); if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) { surf_offset[surf_index] = brw->vtbl.update_renderbuffer_surface( - brw, fb->_ColorDrawBuffers[i], flags, i, surf_index); + brw, fb->_ColorDrawBuffers[i], i, surf_index); } else { emit_null_surface_state(brw, w, h, s, _offset[surf_index]); } -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/7] i965: Inline brw_update_renderbuffer_surfaces().
Less baklava layers. --- src/mesa/drivers/dri/i965/brw_state.h| 5 --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 53 +--- 2 files changed, 20 insertions(+), 38 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index c9fd9414826..a07e70341df 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -221,11 +221,6 @@ void brw_update_texture_surface(struct gl_context *ctx, unsigned unit, uint32_t *surf_offset, bool for_gather, uint32_t plane); -void brw_update_renderbuffer_surfaces(struct brw_context *brw, - const struct gl_framebuffer *fb, - uint32_t render_target_start, - uint32_t *surf_offset); - /* brw_sampler_state.c */ void brw_emit_sampler_state(struct brw_context *brw, uint32_t *sampler_state, diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index a0cb566e719..dc1a770a5d3 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -985,52 +985,39 @@ gen4_update_renderbuffer_surface(struct brw_context *brw, return offset; } -/** - * Construct SURFACE_STATE objects for renderbuffers/draw buffers. - */ -void -brw_update_renderbuffer_surfaces(struct brw_context *brw, - const struct gl_framebuffer *fb, - uint32_t render_target_start, - uint32_t *surf_offset) +static void +update_renderbuffer_surfaces(struct brw_context *brw) { - GLuint i; + const struct gl_context *ctx = >ctx; + + /* BRW_NEW_FS_PROG_DATA */ + const struct brw_wm_prog_data *wm_prog_data = + brw_wm_prog_data(brw->wm.base.prog_data); + + /* _NEW_BUFFERS | _NEW_COLOR */ + const struct gl_framebuffer *fb = ctx->DrawBuffer; + + const unsigned rt_start = wm_prog_data->binding_table.render_target_start; + + uint32_t *surf_offsets = brw->wm.base.surf_offset; /* Update surfaces for drawing buffers */ if (fb->_NumColorDrawBuffers >= 1) { - for (i = 0; i < fb->_NumColorDrawBuffers; i++) { - const uint32_t surf_index = render_target_start + i; + for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i]; if (intel_renderbuffer(rb)) { -surf_offset[surf_index] = brw->gen >= 6 ? - gen6_update_renderbuffer_surface(brw, rb, i, surf_index) : - gen4_update_renderbuffer_surface(brw, rb, i, surf_index); +surf_offsets[rt_start + i] = brw->gen >= 6 ? + gen6_update_renderbuffer_surface(brw, rb, i, rt_start + i) : + gen4_update_renderbuffer_surface(brw, rb, i, rt_start + i); } else { -emit_null_surface_state(brw, fb, _offset[surf_index]); +emit_null_surface_state(brw, fb, _offsets[rt_start + i]); } } } else { - const uint32_t surf_index = render_target_start; - emit_null_surface_state(brw, fb, _offset[surf_index]); + emit_null_surface_state(brw, fb, _offsets[rt_start]); } -} - -static void -update_renderbuffer_surfaces(struct brw_context *brw) -{ - const struct gl_context *ctx = >ctx; - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_wm_prog_data *wm_prog_data = - brw_wm_prog_data(brw->wm.base.prog_data); - - /* _NEW_BUFFERS | _NEW_COLOR */ - const struct gl_framebuffer *fb = ctx->DrawBuffer; - brw_update_renderbuffer_surfaces( - brw, fb, - wm_prog_data->binding_table.render_target_start, - brw->wm.base.surf_offset); brw->ctx.NewDriverState |= BRW_NEW_SURFACES; } -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2] i965: Issue performance warnings when growing the program cache
This involves a bunch of unnecessary copying, a batch flush, and state re-emission. --- src/mesa/drivers/dri/i965/brw_program_cache.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c b/src/mesa/drivers/dri/i965/brw_program_cache.c index 4dcfd5234df..e9706be8961 100644 --- a/src/mesa/drivers/dri/i965/brw_program_cache.c +++ b/src/mesa/drivers/dri/i965/brw_program_cache.c @@ -217,6 +217,9 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size) struct brw_context *brw = cache->brw; struct brw_bo *new_bo; + perf_debug("Copying to larger program cache: %zu kB -> %u kB\n", + cache->bo->size / 1024, new_size / 1024); + new_bo = brw_bo_alloc(brw->bufmgr, "program cache", new_size, 64); if (can_do_exec_capture(brw->screen)) new_bo->kflags = EXEC_OBJECT_CAPTURE; -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] TGSI 16-bit support
Am 22.08.2017 um 19:10 schrieb Marek Olšák: > Hi, > > I'd like to discuss 16-bit float and integer support in TGSI. I'm > proposing this: > > struct tgsi_instruction > { > unsigned Type : 4; /* TGSI_TOKEN_TYPE_INSTRUCTION */ > unsigned NrTokens : 8; /* UINT */ > unsigned Opcode : 8; /* TGSI_OPCODE_ */ > unsigned Saturate : 1; /* BOOL */ > unsigned NumDstRegs : 2; /* UINT */ > unsigned NumSrcRegs : 4; /* UINT */ > unsigned Label : 1; > unsigned Texture: 1; > unsigned Memory : 1; > unsigned Precise: 1; > - unsigned Padding: 1; > + unsigned HalfPrecision : 1; > }; > > There won't be any 16-bit TEMPs in TGSI, but each instruction will > have the HalfPrecision flag, which is a hint for drivers that they can > use a 16-bit opcode. Even texture, load, and store instructions can > set HalfPrecision, which means they can accept and return 16-bit > values. > > The catch is that drivers will have to insert 16-bit <-> 32-bit > conversions manually, because they won't be present in TGSI. The > advantage is that we don't have to add 200 new opcodes for the 3 new > 16-bit types. > > What do you think? > Flagging instructions as 16bit doesn't look too bad to me, but I'm wondering if this isn't a bit problematic wrt register files. Clearly, this is a restriction of tgsi "everything is a 32x4 value". Doubles, of course, have a similar problem, but in the end they still have well-defined interactions with the register files, because it's defined what bits ultimately represent a 64bit value (at least in theory from tgsi's point of view, it is perfectly valid to use some 32bit calculations to set some reg, then just use double instructions directly without conversion on these values - it may not be meaningful but it is well defined). But it looks like you want to avoid to have a well-defined mapping of the registers to 16bit types (and with 16 bits instruction just being hints, I can't see how it could exist). Note that being able to flag instructions as HalfPrecision does not necessarily mean you can't have any explicit 16bit conversion instructions too. Roland ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] intel/blorp: Shrink the size of brw_blorp_blit_prog_key.
This shrinks the key from 64 bytes to 20 bytes. --- src/intel/blorp/blorp_priv.h | 44 ++-- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/intel/blorp/blorp_priv.h b/src/intel/blorp/blorp_priv.h index 81bf8c66c66..f96a5e0e5ad 100644 --- a/src/intel/blorp/blorp_priv.h +++ b/src/intel/blorp/blorp_priv.h @@ -221,20 +221,20 @@ struct brw_blorp_blit_prog_key /* Number of samples per pixel that have been configured in the surface * state for texturing from. */ - unsigned tex_samples; + uint8_t tex_samples; /* MSAA layout that has been configured in the surface state for texturing * from. */ - enum isl_msaa_layout tex_layout; + enum isl_msaa_layout tex_layout:8; - enum isl_aux_usage tex_aux_usage; + enum isl_aux_usage tex_aux_usage:8; /* Actual number of samples per pixel in the source image. */ - unsigned src_samples; + uint8_t src_samples; /* Actual MSAA layout used by the source image. */ - enum isl_msaa_layout src_layout; + enum isl_msaa_layout src_layout:8; /* Number of bits per channel in the source image. */ uint8_t src_bpc; @@ -245,16 +245,16 @@ struct brw_blorp_blit_prog_key /* Number of samples per pixel that have been configured in the render * target. */ - unsigned rt_samples; + uint8_t rt_samples; /* MSAA layout that has been configured in the render target. */ - enum isl_msaa_layout rt_layout; + enum isl_msaa_layout rt_layout:8; /* Actual number of samples per pixel in the destination image. */ - unsigned dst_samples; + uint8_t dst_samples; /* Actual MSAA layout used by the destination image. */ - enum isl_msaa_layout dst_layout; + enum isl_msaa_layout dst_layout:8; /* Number of bits per channel in the destination image. */ uint8_t dst_bpc; @@ -262,65 +262,65 @@ struct brw_blorp_blit_prog_key /* Type of the data to be read from the texture (one of * nir_type_(int|uint|float)). */ - nir_alu_type texture_data_type; + nir_alu_type texture_data_type:8; /* True if the source image is W tiled. If true, the surface state for the * source image must be configured as Y tiled, and tex_samples must be 0. */ - bool src_tiled_w; + bool src_tiled_w:1; /* True if the destination image is W tiled. If true, the surface state * for the render target must be configured as Y tiled, and rt_samples must * be 0. */ - bool dst_tiled_w; + bool dst_tiled_w:1; /* True if the destination is an RGB format. If true, the surface state * for the render target must be configured as red with three times the * normal width. We need to do this because you cannot render to * non-power-of-two formats. */ - bool dst_rgb; + bool dst_rgb:1; /* True if all source samples should be blended together to produce each * destination pixel. If true, src_tiled_w must be false, tex_samples must * equal src_samples, and tex_samples must be nonzero. */ - bool blend; + bool blend:1; /* True if the rectangle being sent through the rendering pipeline might be * larger than the destination rectangle, so the WM program should kill any * pixels that are outside the destination rectangle. */ - bool use_kill; + bool use_kill:1; /** * True if the WM program should be run in MSDISPMODE_PERSAMPLE with more * than one sample per pixel. */ - bool persample_msaa_dispatch; + bool persample_msaa_dispatch:1; /* True for scaled blitting. */ - bool blit_scaled; + bool blit_scaled:1; /* True if this blit operation may involve intratile offsets on the source. * In this case, we need to add the offset before texturing. */ - bool need_src_offset; + bool need_src_offset:1; /* True if this blit operation may involve intratile offsets on the * destination. In this case, we need to add the offset to gl_FragCoord. */ - bool need_dst_offset; + bool need_dst_offset:1; + + /* True for blits with filter = GL_LINEAR. */ + bool bilinear_filter:1; /* Scale factors between the pixel grid and the grid of samples. We're * using grid of samples for bilinear filetring in multisample scaled blits. */ float x_scale; float y_scale; - - /* True for blits with filter = GL_LINEAR. */ - bool bilinear_filter; }; /** -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] vbo: fix glVertexAttrib(index=0)
Depending on which extension or GL spec you read the behavior of glVertexAttrib(index=0) either sets the current value for generic attribute 0, or it emits a vertex just like glVertex(). I believe it should do either, depending on context (see below). The piglit gl-2.0-vertex-const-attr test declares two vertex attributes: attribute vec2 vertex; attribute vec4 attr; and the GLSL linker assigns "vertex" to location 0 and "attr" to location 1. The test passes. But if the declarations were reversed such that "attr" was location 0 and "vertex" was location 1, the test would fail to draw properly. The problem is the call to glVertexAttrib(index=0) to set attr's value was interpreted as glVertex() and did not set generic attribute[0]'s value. Interesting, calling glVertex() outside glBegin/End (which is effectively what the piglit test does) does not generate a GL error. I believe the behavior of glVertexAttrib(index=0) should depend on whether it's called inside or outside of glBegin/glEnd(). If inside glBegin/End(), it should act like glVertex(). Else, it should behave like glVertexAttrib(index > 0). This seems to be what NVIDIA does. This patch makes two changes: 1. Check if we're inside glBegin/End for glVertexAttrib() 2. Fix the vertex array binding for recalculate_input_bindings(). As it was, we were using >currval[VBO_ATTRIB_POS], but that's interpreted as a zero-stride attribute and doesn't make sense for array drawing. No Piglit regressions. Fixes updated gl-2.0-vertex-const-attr test and passes new gl-2.0-vertex-attrib-0 test. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101941 --- src/mesa/vbo/vbo_attrib_tmp.h | 7 +-- src/mesa/vbo/vbo_exec_array.c | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/mesa/vbo/vbo_attrib_tmp.h b/src/mesa/vbo/vbo_attrib_tmp.h index 5718ac5..126e4ef 100644 --- a/src/mesa/vbo/vbo_attrib_tmp.h +++ b/src/mesa/vbo/vbo_attrib_tmp.h @@ -524,15 +524,18 @@ TAG(MultiTexCoord4fv)(GLenum target, const GLfloat * v) /** * If index=0, does glVertexAttrib*() alias glVertex() to emit a vertex? + * It depends on a few things, including whether we're inside or outside + * of glBegin/glEnd. */ static inline bool is_vertex_position(const struct gl_context *ctx, GLuint index) { - return index == 0 && _mesa_attr_zero_aliases_vertex(ctx); + return (index == 0 && + _mesa_attr_zero_aliases_vertex(ctx) && + _mesa_inside_begin_end(ctx)); } - static void GLAPIENTRY TAG(VertexAttrib1fARB)(GLuint index, GLfloat x) { diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index edd55ce..e3421fa 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -356,7 +356,7 @@ recalculate_input_bindings(struct gl_context *ctx) else if (array[VERT_ATTRIB_POS].Enabled) inputs[0] = [VERT_ATTRIB_POS]; else { -inputs[0] = >currval[VBO_ATTRIB_POS]; +inputs[0] = >currval[VBO_ATTRIB_GENERIC0]; const_inputs |= VERT_BIT_POS; } -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 102017] Wrong colours in Cities Skyline
https://bugs.freedesktop.org/show_bug.cgi?id=102017 --- Comment #11 from Thomas Jollans--- Created attachment 133679 --> https://bugs.freedesktop.org/attachment.cgi?id=133679=edit in-game data view has correct colours -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 102017] Wrong colours in Cities Skyline
https://bugs.freedesktop.org/show_bug.cgi?id=102017 --- Comment #10 from Thomas Jollans--- Created attachment 133678 --> https://bugs.freedesktop.org/attachment.cgi?id=133678=edit in-game screenshot: blank surfaces -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 102017] Wrong colours in Cities Skyline
https://bugs.freedesktop.org/show_bug.cgi?id=102017 --- Comment #9 from Thomas Jollans--- I'm having the same issue, with Ubuntu 17.04 stock mesa and r600 (HD 6870) It looks to me like an issue with textures not being drawn; Everything is displayed with nice reflective surfaces. In the in-game data views (where most surfaces are blank), the colours are largely fine, but there are some glitches in details (e.g. water, road bridges). I've just upgraded from Ubuntu 16.04 LTS. Before the upgrade, everything worked beautifully with the same game version. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 100613] Regression in Mesa 17 on s390x (zSystems)
https://bugs.freedesktop.org/show_bug.cgi?id=100613 --- Comment #43 from Ben Crocker--- (In reply to Ben Crocker from comment #42) > Created attachment 133677 [details] [review] > lp_build_gather_elem_vec big-endian fix for 3x16 load > > In reply to Roland's Comment 32: > > Roland, thanks for the constructive feedback. Here is what the patch > looks like now. > > I don't disagree that this is messy, but it DOES resolve (most of) the > Piglit regressions. > > I agree that the code in lp_bld_gather should work for 4x16 bit > vectors, but SOMETHING appears to have gone right already for 4x16 bit > vectors, as the only regressions seen had to do with 3x16 vectors. Clarification: the only regressions seen AFTER Ray Strode's patch (attachment 130980). -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/4] util/disk_cache: add struct cache_item_metadata
On 08/22/2017 02:40 AM, Michel Dänzer wrote: On 22/08/17 06:31 PM, Timothy Arceri wrote: On 22/08/17 18:02, Michel Dänzer wrote: On 22/08/17 10:02 AM, Pierre-Loup A. Griffais wrote: On 08/18/2017 04:02 AM, Nicolai Hähnle wrote: On 15.08.2017 01:26, Timothy Arceri wrote: This will be used to store more information about the cache item in it's header. This information is intended for 3rd party and cache analysis use but can also be used for detecting the unlikely scenario of cache collisions. --- src/util/disk_cache.h | 22 ++ 1 file changed, 22 insertions(+) diff --git a/src/util/disk_cache.h b/src/util/disk_cache.h index 9aade16a9e..d6d7609ffb 100644 --- a/src/util/disk_cache.h +++ b/src/util/disk_cache.h @@ -34,20 +34,42 @@ #ifdef __cplusplus extern "C" { #endif /* Size of cache keys in bytes. */ #define CACHE_KEY_SIZE 20 typedef uint8_t cache_key[CACHE_KEY_SIZE]; +/* WARNING: 3rd party applications might be reading the cache item metadata. + * Do not change these values without making the change widely known. + * Please contact Valve developers and make them aware of this change. + */ +#define CACHE_ITEM_TYPE_UNKNOWN 0x0 +#define CACHE_ITEM_TYPE_GLSL 0x1 + +typedef uint32_t cache_item_metadata_type; Please don't do typedefs like that, they just obfuscate. If you don't want to just use uint32_t, I'd suggest to use an enum. At a higher level, what is this actually good for? So I get Valve wants to parse something in the shader cache, but why / what for? Steam looks at local entries and delete leftover ones should you upgrade Mesa or your GPU. That's rather questionable. Steam should leave purging old cache entries to Mesa. My understanding is steam overrides the environment var so that each game with have its own local cache. If they don't get cleaned up it could potentially waste a significant amount of space. I see, thanks for the clarification. I thought Pierre-Loup was referring to the default Mesa shader cache. Yes, Steam only messes with the caches it's managing in the first place; it's in a good position to improve shader cache usage as it has a lot more context on games and where they live, and can run code at points Mesa cannot (like when you switch GPUs but never play a given game again, or a game is uninstalled, or before a game starts), so it can use that information to ensure optimal cache hit patterns. You can always opt out of any shader cache related shenanigans by setting this: STEAM_ENABLE_SHADER_CACHE_MANAGEMENT=0 Thanks, - Pierre-Loup ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 100613] Regression in Mesa 17 on s390x (zSystems)
https://bugs.freedesktop.org/show_bug.cgi?id=100613 Ben Crockerchanged: What|Removed |Added Attachment #131577|0 |1 is obsolete|| --- Comment #42 from Ben Crocker --- Created attachment 133677 --> https://bugs.freedesktop.org/attachment.cgi?id=133677=edit lp_build_gather_elem_vec big-endian fix for 3x16 load In reply to Roland's Comment 32: Roland, thanks for the constructive feedback. Here is what the patch looks like now. I don't disagree that this is messy, but it DOES resolve (most of) the Piglit regressions. I agree that the code in lp_bld_gather should work for 4x16 bit vectors, but SOMETHING appears to have gone right already for 4x16 bit vectors, as the only regressions seen had to do with 3x16 vectors. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] TGSI 16-bit support
On Tue, Aug 22, 2017 at 7:55 PM, Ilia Mirkinwrote: > On Tue, Aug 22, 2017 at 1:32 PM, Marek Olšák wrote: >> On Tue, Aug 22, 2017 at 7:28 PM, Ilia Mirkin wrote: >>> How do you propose defining the semantics for e.g. loading a 16-bit >>> value from a constbuf/ssbo? Would those get separate instructions? >> >> st/mesa should use UP2H, PK2H and similar opcodes for I16 and U16, and >> drivers can replace them with MOV if HalfPrecision == 1. > > The idea I had been toying with was to instead mark TEMP's as 16-bit > precision and letting ops like ADD infer whatever they need from that. > > I don't know whether that's any different from your suggestion, and if > it is, whether it's better or worse though. Just mentioning it. The question is which one is easier to implement? Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] TGSI 16-bit support
On Tue, Aug 22, 2017 at 1:32 PM, Marek Olšákwrote: > On Tue, Aug 22, 2017 at 7:28 PM, Ilia Mirkin wrote: >> How do you propose defining the semantics for e.g. loading a 16-bit >> value from a constbuf/ssbo? Would those get separate instructions? > > st/mesa should use UP2H, PK2H and similar opcodes for I16 and U16, and > drivers can replace them with MOV if HalfPrecision == 1. The idea I had been toying with was to instead mark TEMP's as 16-bit precision and letting ops like ADD infer whatever they need from that. I don't know whether that's any different from your suggestion, and if it is, whether it's better or worse though. Just mentioning it. Cheers, -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] TGSI 16-bit support
On Tue, Aug 22, 2017 at 7:28 PM, Ilia Mirkinwrote: > How do you propose defining the semantics for e.g. loading a 16-bit > value from a constbuf/ssbo? Would those get separate instructions? st/mesa should use UP2H, PK2H and similar opcodes for I16 and U16, and drivers can replace them with MOV if HalfPrecision == 1. Marek > > On Tue, Aug 22, 2017 at 1:10 PM, Marek Olšák wrote: >> Hi, >> >> I'd like to discuss 16-bit float and integer support in TGSI. I'm >> proposing this: >> >> struct tgsi_instruction >> { >> unsigned Type : 4; /* TGSI_TOKEN_TYPE_INSTRUCTION */ >> unsigned NrTokens : 8; /* UINT */ >> unsigned Opcode : 8; /* TGSI_OPCODE_ */ >> unsigned Saturate : 1; /* BOOL */ >> unsigned NumDstRegs : 2; /* UINT */ >> unsigned NumSrcRegs : 4; /* UINT */ >> unsigned Label : 1; >> unsigned Texture: 1; >> unsigned Memory : 1; >> unsigned Precise: 1; >> - unsigned Padding: 1; >> + unsigned HalfPrecision : 1; >> }; >> >> There won't be any 16-bit TEMPs in TGSI, but each instruction will >> have the HalfPrecision flag, which is a hint for drivers that they can >> use a 16-bit opcode. Even texture, load, and store instructions can >> set HalfPrecision, which means they can accept and return 16-bit >> values. >> >> The catch is that drivers will have to insert 16-bit <-> 32-bit >> conversions manually, because they won't be present in TGSI. The >> advantage is that we don't have to add 200 new opcodes for the 3 new >> 16-bit types. >> >> What do you think? >> >> Marek >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] TGSI 16-bit support
How do you propose defining the semantics for e.g. loading a 16-bit value from a constbuf/ssbo? Would those get separate instructions? On Tue, Aug 22, 2017 at 1:10 PM, Marek Olšákwrote: > Hi, > > I'd like to discuss 16-bit float and integer support in TGSI. I'm > proposing this: > > struct tgsi_instruction > { > unsigned Type : 4; /* TGSI_TOKEN_TYPE_INSTRUCTION */ > unsigned NrTokens : 8; /* UINT */ > unsigned Opcode : 8; /* TGSI_OPCODE_ */ > unsigned Saturate : 1; /* BOOL */ > unsigned NumDstRegs : 2; /* UINT */ > unsigned NumSrcRegs : 4; /* UINT */ > unsigned Label : 1; > unsigned Texture: 1; > unsigned Memory : 1; > unsigned Precise: 1; > - unsigned Padding: 1; > + unsigned HalfPrecision : 1; > }; > > There won't be any 16-bit TEMPs in TGSI, but each instruction will > have the HalfPrecision flag, which is a hint for drivers that they can > use a 16-bit opcode. Even texture, load, and store instructions can > set HalfPrecision, which means they can accept and return 16-bit > values. > > The catch is that drivers will have to insert 16-bit <-> 32-bit > conversions manually, because they won't be present in TGSI. The > advantage is that we don't have to add 200 new opcodes for the 3 new > 16-bit types. > > What do you think? > > Marek > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/7 v2] configure.ac: Introduce HAVE_ARM_ASM/HAVE_AARCH64_ASM and the -D flags.
Emil Velikovwrites: > Hi Eric, > > On 10 August 2017 at 23:43, Eric Anholt wrote: > >> +aarch64) >> +DEFINES="$DEFINES -DUSE_AARCH64_ASM" > I cannot see any places where the define is used. > > Am I missing something or there isn't any? Do you have some WIP > patches that make use of it? There is no current user, but I think being consistent with the other asm defines/conditionals is best. signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] TGSI 16-bit support
Hi, I'd like to discuss 16-bit float and integer support in TGSI. I'm proposing this: struct tgsi_instruction { unsigned Type : 4; /* TGSI_TOKEN_TYPE_INSTRUCTION */ unsigned NrTokens : 8; /* UINT */ unsigned Opcode : 8; /* TGSI_OPCODE_ */ unsigned Saturate : 1; /* BOOL */ unsigned NumDstRegs : 2; /* UINT */ unsigned NumSrcRegs : 4; /* UINT */ unsigned Label : 1; unsigned Texture: 1; unsigned Memory : 1; unsigned Precise: 1; - unsigned Padding: 1; + unsigned HalfPrecision : 1; }; There won't be any 16-bit TEMPs in TGSI, but each instruction will have the HalfPrecision flag, which is a hint for drivers that they can use a 16-bit opcode. Even texture, load, and store instructions can set HalfPrecision, which means they can accept and return 16-bit values. The catch is that drivers will have to insert 16-bit <-> 32-bit conversions manually, because they won't be present in TGSI. The advantage is that we don't have to add 200 new opcodes for the 3 new 16-bit types. What do you think? Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/6] gallium/radeon: fix saving multi-part command streams
For the series: Reviewed-by: Marek OlšákMarek On Tue, Aug 22, 2017 at 5:45 PM, Nicolai Hähnle wrote: > From: Nicolai Hähnle > > Use the correct type to fix pointer arithmetic. > --- > src/gallium/drivers/radeon/r600_pipe_common.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c > b/src/gallium/drivers/radeon/r600_pipe_common.c > index 37c12dea374..7226fc21a04 100644 > --- a/src/gallium/drivers/radeon/r600_pipe_common.c > +++ b/src/gallium/drivers/radeon/r600_pipe_common.c > @@ -495,7 +495,7 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags, > void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs, > struct radeon_saved_cs *saved, bool get_buffer_list) > { > - void *buf; > + uint32_t *buf; > unsigned i; > > /* Save the IB chunks. */ > -- > 2.11.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/5] tgsi: macro-ify the opcodes table
On 08/22/2017 10:35 AM, Nicolai Hähnle wrote: On 22.08.2017 18:33, Brian Paul wrote: On 08/22/2017 09:32 AM, Nicolai Hähnle wrote: From: Nicolai HähnleSo we can easily re-arrange members of tgsi_opcode_info, and readers of the code don't have to guess what all the 0s mean. Mostly done with regex search --- src/gallium/auxiliary/Makefile.sources | 1 + src/gallium/auxiliary/tgsi/tgsi_info.c | 262 ++--- src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h | 251 +++ 3 files changed, 263 insertions(+), 251 deletions(-) create mode 100644 src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index 1b0eeb99581..36d6c8fb4f4 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -151,6 +151,7 @@ C_SOURCES := \ tgsi/tgsi_from_mesa.h \ tgsi/tgsi_info.c \ tgsi/tgsi_info.h \ +tgsi/tgsi_info_opcodes.h \ tgsi/tgsi_iterate.c \ tgsi/tgsi_iterate.h \ tgsi/tgsi_lowering.c \ diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 472c088c7de..5112826aafb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -35,261 +35,21 @@ #define CHAN TGSI_OUTPUT_CHAN_DEPENDENT #define OTHR TGSI_OUTPUT_OTHER -static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = -{ - { 1, 1, 0, 0, 0, 0, 0, COMP, "ARL", TGSI_OPCODE_ARL }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "MOV", TGSI_OPCODE_MOV }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "LIT", TGSI_OPCODE_LIT }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "RCP", TGSI_OPCODE_RCP }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "RSQ", TGSI_OPCODE_RSQ }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "EXP", TGSI_OPCODE_EXP }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "LOG", TGSI_OPCODE_LOG }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MUL", TGSI_OPCODE_MUL }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "ADD", TGSI_OPCODE_ADD }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "DP3", TGSI_OPCODE_DP3 }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "DP4", TGSI_OPCODE_DP4 }, - { 1, 2, 0, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX_LZ", TGSI_OPCODE_TEX_LZ }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, - { 1, 3, 0, 0, 0, 0, 0, REPL, "", 21 }, /* removed */ - { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF_LZ", TGSI_OPCODE_TXF_LZ }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "", 31 }, /* removed */ - { 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 }, - { 1, 0, 0, 0, 0, 0, 0, OTHR, "CLOCK", TGSI_OPCODE_CLOCK }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "", 35 }, /* removed */ - { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "D2U64", TGSI_OPCODE_D2U64 }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "D2I64", TGSI_OPCODE_D2I64 }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "U642D", TGSI_OPCODE_U642D }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX }, - { 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B",
Re: [Mesa-dev] [PATCH 3/5] tgsi: macro-ify the opcodes table
On 22.08.2017 18:33, Brian Paul wrote: On 08/22/2017 09:32 AM, Nicolai Hähnle wrote: From: Nicolai HähnleSo we can easily re-arrange members of tgsi_opcode_info, and readers of the code don't have to guess what all the 0s mean. Mostly done with regex search --- src/gallium/auxiliary/Makefile.sources | 1 + src/gallium/auxiliary/tgsi/tgsi_info.c | 262 ++--- src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h | 251 +++ 3 files changed, 263 insertions(+), 251 deletions(-) create mode 100644 src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index 1b0eeb99581..36d6c8fb4f4 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -151,6 +151,7 @@ C_SOURCES := \ tgsi/tgsi_from_mesa.h \ tgsi/tgsi_info.c \ tgsi/tgsi_info.h \ +tgsi/tgsi_info_opcodes.h \ tgsi/tgsi_iterate.c \ tgsi/tgsi_iterate.h \ tgsi/tgsi_lowering.c \ diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 472c088c7de..5112826aafb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -35,261 +35,21 @@ #define CHAN TGSI_OUTPUT_CHAN_DEPENDENT #define OTHR TGSI_OUTPUT_OTHER -static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = -{ - { 1, 1, 0, 0, 0, 0, 0, COMP, "ARL", TGSI_OPCODE_ARL }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "MOV", TGSI_OPCODE_MOV }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "LIT", TGSI_OPCODE_LIT }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "RCP", TGSI_OPCODE_RCP }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "RSQ", TGSI_OPCODE_RSQ }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "EXP", TGSI_OPCODE_EXP }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "LOG", TGSI_OPCODE_LOG }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MUL", TGSI_OPCODE_MUL }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "ADD", TGSI_OPCODE_ADD }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "DP3", TGSI_OPCODE_DP3 }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "DP4", TGSI_OPCODE_DP4 }, - { 1, 2, 0, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX_LZ", TGSI_OPCODE_TEX_LZ }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, - { 1, 3, 0, 0, 0, 0, 0, REPL, "", 21 }, /* removed */ - { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF_LZ", TGSI_OPCODE_TXF_LZ }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "", 31 }, /* removed */ - { 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 }, - { 1, 0, 0, 0, 0, 0, 0, OTHR, "CLOCK", TGSI_OPCODE_CLOCK }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "", 35 }, /* removed */ - { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "D2U64", TGSI_OPCODE_D2U64 }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "D2I64", TGSI_OPCODE_D2I64 }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "U642D", TGSI_OPCODE_U642D }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX }, - { 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B }, - { 1, 1, 0, 0, 0, 0,
Re: [Mesa-dev] [PATCH 1/7] gallium: add CONSTBUF type to tgsi_file_type
Am 22.08.2017 um 16:56 schrieb Ilia Mirkin: > On Tue, Aug 22, 2017 at 10:51 AM, Roland Scheidegger> wrote: >> I am probably missing something here, but why do you need a new register >> file? Since you couldn't use LOAD with TGSI_FILE_CONSTANT before, can't >> you just allow LOAD with TGSI_FILE_CONSTANT and achieve the same thing? >> Or do you need to know how it's going to be accessed in advance? > > With bindless, LOAD can take a CONST I believe [which contains the > value of the bindless id]. I think it's nice to keep those concepts > separate... having CONST sometimes mean the value and other times mean > the address is a bit weird. This way CONSTBUF[0] is the address of the > 0th constbuf. Ok, makes sense then. Roland ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/5] tgsi: macro-ify the opcodes table
On 08/22/2017 09:32 AM, Nicolai Hähnle wrote: From: Nicolai HähnleSo we can easily re-arrange members of tgsi_opcode_info, and readers of the code don't have to guess what all the 0s mean. Mostly done with regex search --- src/gallium/auxiliary/Makefile.sources | 1 + src/gallium/auxiliary/tgsi/tgsi_info.c | 262 ++--- src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h | 251 +++ 3 files changed, 263 insertions(+), 251 deletions(-) create mode 100644 src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index 1b0eeb99581..36d6c8fb4f4 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -151,6 +151,7 @@ C_SOURCES := \ tgsi/tgsi_from_mesa.h \ tgsi/tgsi_info.c \ tgsi/tgsi_info.h \ + tgsi/tgsi_info_opcodes.h \ tgsi/tgsi_iterate.c \ tgsi/tgsi_iterate.h \ tgsi/tgsi_lowering.c \ diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 472c088c7de..5112826aafb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -35,261 +35,21 @@ #define CHAN TGSI_OUTPUT_CHAN_DEPENDENT #define OTHR TGSI_OUTPUT_OTHER -static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = -{ - { 1, 1, 0, 0, 0, 0, 0, COMP, "ARL", TGSI_OPCODE_ARL }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "MOV", TGSI_OPCODE_MOV }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "LIT", TGSI_OPCODE_LIT }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "RCP", TGSI_OPCODE_RCP }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "RSQ", TGSI_OPCODE_RSQ }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "EXP", TGSI_OPCODE_EXP }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "LOG", TGSI_OPCODE_LOG }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MUL", TGSI_OPCODE_MUL }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "ADD", TGSI_OPCODE_ADD }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "DP3", TGSI_OPCODE_DP3 }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "DP4", TGSI_OPCODE_DP4 }, - { 1, 2, 0, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX_LZ", TGSI_OPCODE_TEX_LZ }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, - { 1, 3, 0, 0, 0, 0, 0, REPL, "", 21 }, /* removed */ - { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF_LZ", TGSI_OPCODE_TXF_LZ }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "", 31 }, /* removed */ - { 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 }, - { 1, 0, 0, 0, 0, 0, 0, OTHR, "CLOCK", TGSI_OPCODE_CLOCK }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "", 35 }, /* removed */ - { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "D2U64", TGSI_OPCODE_D2U64 }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "D2I64", TGSI_OPCODE_D2I64 }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "U642D", TGSI_OPCODE_U642D }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX }, - { 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB",
[Mesa-dev] [PATCH v2] Android: gallium_dri: pass dri.sym to linker
Pass the dri.sym version script to the linker. This ensures only explicitly exported symbols are exported and shrinks the library by up to 60KB. HAVE_DLADDR also needs to be set so that __driDriverExtensions is defined. We need to pass "--undefined-version" because the Android build system sets --no-undefined-version by default and we get an error on driver specific symbols if those drivers are disabled without the option. Suggested-by: Emil VelikovReviewed-by: Emil Velikov Signed-off-by: Rob Herring --- Android.common.mk | 1 + src/gallium/targets/dri/Android.mk | 7 +++ 2 files changed, 8 insertions(+) diff --git a/Android.common.mk b/Android.common.mk index d29e2e29d6f6..b5942add7678 100644 --- a/Android.common.mk +++ b/Android.common.mk @@ -66,6 +66,7 @@ LOCAL_CFLAGS += \ -DHAVE___BUILTIN_CLZLL \ -DHAVE___BUILTIN_UNREACHABLE \ -DHAVE_PTHREAD=1 \ + -DHAVE_DLADDR \ -DHAVE_DLOPEN \ -DHAVE_DL_ITERATE_PHDR \ -DMAJOR_IN_SYSMACROS \ diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk index 150b2e368e51..2218f7869ce6 100644 --- a/src/gallium/targets/dri/Android.mk +++ b/src/gallium/targets/dri/Android.mk @@ -32,6 +32,13 @@ LOCAL_SRC_FILES := target.c LOCAL_CFLAGS := +# We need --undefined-version as some functions in dri.sym may be missing +# depending on which drivers are enabled or not. Otherwise, we get the error: +# "version script assignment of to symbol FOO failed: symbol not defined" +LOCAL_LDFLAGS := \ + -Wl,--version-script=$(LOCAL_PATH)/dri.sym \ + -Wl,--undefined-version + LOCAL_SHARED_LIBRARIES := \ libdl \ liblog \ -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/5] tgsi: store opcode mnemonics in a separate table
For the series: Reviewed-by: Marek OlšákMarek On Tue, Aug 22, 2017 at 5:32 PM, Nicolai Hähnle wrote: > From: Nicolai Hähnle > > They are only used for debug info. > > Together with making tgsi_opcode_info::opcode a bitfield, this reduces > the size of tgsi_opcode_info on 64-bit systems from 24 bytes to 4 bytes, > and makes the whole data structure a bit more linker friendly. > --- > src/gallium/auxiliary/tgsi/tgsi_info.c | 19 +++ > src/gallium/auxiliary/tgsi/tgsi_info.h | 3 +-- > 2 files changed, 16 insertions(+), 6 deletions(-) > > diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c > b/src/gallium/auxiliary/tgsi/tgsi_info.c > index 5112826aafb..08bce6380c9 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_info.c > +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c > @@ -36,11 +36,11 @@ > #define OTHR TGSI_OUTPUT_OTHER > > #define OPCODE(_num_dst, _num_src, _output_mode, name, ...) \ > - { .mnemonic = #name, .opcode = TGSI_OPCODE_ ## name, \ > + { .opcode = TGSI_OPCODE_ ## name, \ > .output_mode = _output_mode, .num_dst = _num_dst, .num_src = _num_src, \ > ##__VA_ARGS__ }, > > -#define OPCODE_GAP(opc) { .mnemonic = "", .opcode = opc }, > +#define OPCODE_GAP(opc) { .opcode = opc }, > > static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = > { > @@ -69,12 +69,23 @@ tgsi_get_opcode_info( uint opcode ) > return NULL; > } > > +#define OPCODE(_num_dst, _num_src, _output_mode, name, ...) #name, > +#define OPCODE_GAP(opc) "UNK" #opc, > + > +static const char * const opcode_names[TGSI_OPCODE_LAST] = > +{ > +#include "tgsi_info_opcodes.h" > +}; > + > +#undef OPCODE > +#undef OPCODE_GAP > > const char * > tgsi_get_opcode_name( uint opcode ) > { > - const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode); > - return info->mnemonic; > + if (opcode >= ARRAY_SIZE(opcode_names)) > + return "UNK_OOB"; > + return opcode_names[opcode]; > } > > > diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h > b/src/gallium/auxiliary/tgsi/tgsi_info.h > index e65f7ac3b74..74bff186924 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_info.h > +++ b/src/gallium/auxiliary/tgsi/tgsi_info.h > @@ -79,8 +79,7 @@ struct tgsi_opcode_info > unsigned pre_dedent:1; > unsigned post_indent:1; > enum tgsi_output_mode output_mode:3; > - const char *mnemonic; > - uint opcode; > + unsigned opcode:8; > }; > > const struct tgsi_opcode_info * > -- > 2.11.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] tgsi/scan: fix uses_double
On 08/22/2017 09:52 AM, Nicolai Hähnle wrote: On 22.08.2017 17:42, Marek Olšák wrote: From: Marek Olšák--- src/gallium/auxiliary/tgsi/tgsi_scan.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 2fd7d7c..db87ce3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -450,22 +450,28 @@ scan_instruction(struct tgsi_shader_info *info, info->uses_linear_opcode_interp_offset = TRUE; break; case TGSI_OPCODE_INTERP_SAMPLE: info->uses_linear_opcode_interp_sample = TRUE; break; } break; } } - if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D && - fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG) + if ((fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D && +fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG) || + fullinst->Instruction.Opcode == TGSI_OPCODE_DFMA || + fullinst->Instruction.Opcode == TGSI_OPCODE_DDIV || + fullinst->Instruction.Opcode == TGSI_OPCODE_D2U64 || + fullinst->Instruction.Opcode == TGSI_OPCODE_D2I64 || + fullinst->Instruction.Opcode == TGSI_OPCODE_U642D || + fullinst->Instruction.Opcode == TGSI_OPCODE_I642D) info->uses_doubles = TRUE; I'd suggest creating a tgsi_uses_double(opcode) helper function. And use explicit switch cases instead of range checks just in case we ever renumber the opcodes. The compiler should optimize the switch cases accordingly. -Brian Nice. In a neat coincidence, this is precisely the kind of thing that can be done more cleanly on top of the tgsi_info series that I just sent out :) I suggest using that, but for now this approach is also Reviewed-by: Nicolai Hähnle for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { scan_src_operand(info, fullinst, >Src[i], i, tgsi_util_get_inst_usage_mask(fullinst, i), is_interp_instruction, _mem_inst); } if (fullinst->Instruction.Texture) { for (i = 0; i < fullinst->Texture.NumOffsets; i++) { ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Android: gallium_dri: pass dri.sym to linker
On 21 August 2017 at 20:33, Rob Herringwrote: > Pass the dri.sym version script to the linker. This ensures only > explicitly exported symbols are exported and shrinks the library by up > to 60KB. > > We need to pass "--undefined-version" because the Android build system > sets --no-undefined-version by default and we get an error on > __driDriverExtensions without the option. > > Suggested-by: Emil Velikov > Signed-off-by: Rob Herring > --- > src/gallium/targets/dri/Android.mk | 4 > 1 file changed, 4 insertions(+) > > diff --git a/src/gallium/targets/dri/Android.mk > b/src/gallium/targets/dri/Android.mk > index 150b2e368e51..313930b76274 100644 > --- a/src/gallium/targets/dri/Android.mk > +++ b/src/gallium/targets/dri/Android.mk > @@ -32,6 +32,10 @@ LOCAL_SRC_FILES := target.c > > LOCAL_CFLAGS := > > +LOCAL_LDFLAGS := \ > + -Wl,--version-script=$(LOCAL_PATH)/dri.sym \ > + -Wl,--undefined-version > + Can you please add an inline comment about undefined-version. I'm thinking of: "Depending on the build permutation, some of a function or two mentioned in the sym file may be missing. Which will lead to errors like the following unless --undefined-version is set. version script assignment of to symbol FOO failed: symbol not defined " With a bit of comment (regardless if you use my example or not) Reviewed-by: Emil Velikov Thanks Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] st/mesa: select Z16 for GL_DEPTH_COMPONENT
On Tue, Aug 22, 2017 at 5:09 PM, Nicolai Hähnlewrote: > Are you sure we really want this? I could well imagine badly coded > applications that don't explicitly specify the required Z depth, but will > have artifacts with 16 bits because they need the precision. It's something I noticed. I don't feel strongly about it. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] gallium/docs: Fix the math formula of U2I64
before: dst.xy = (uint64_t) src0.x dst.zw = (uint64_t) src0.y after: dst.xy = (int64_t) src0.x dst.zw = (int64_t) src0.y Signed-off-by: Mun Gwan-gyeong--- src/gallium/docs/source/tgsi.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index f9b1385e55..31331ef511 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -2199,9 +2199,9 @@ two-component vectors with 64-bits in each component. .. math:: - dst.xy = (uint64_t) src0.x + dst.xy = (int64_t) src0.x - dst.zw = (uint64_t) src0.y + dst.zw = (int64_t) src0.y .. opcode:: I2I64 - Signed Integer to 64-bit Integer -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] tgsi/scan: fix uses_double
On 22.08.2017 17:42, Marek Olšák wrote: From: Marek Olšák--- src/gallium/auxiliary/tgsi/tgsi_scan.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 2fd7d7c..db87ce3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -450,22 +450,28 @@ scan_instruction(struct tgsi_shader_info *info, info->uses_linear_opcode_interp_offset = TRUE; break; case TGSI_OPCODE_INTERP_SAMPLE: info->uses_linear_opcode_interp_sample = TRUE; break; } break; } } - if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D && - fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG) + if ((fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D && +fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG) || + fullinst->Instruction.Opcode == TGSI_OPCODE_DFMA || + fullinst->Instruction.Opcode == TGSI_OPCODE_DDIV || + fullinst->Instruction.Opcode == TGSI_OPCODE_D2U64 || + fullinst->Instruction.Opcode == TGSI_OPCODE_D2I64 || + fullinst->Instruction.Opcode == TGSI_OPCODE_U642D || + fullinst->Instruction.Opcode == TGSI_OPCODE_I642D) info->uses_doubles = TRUE; Nice. In a neat coincidence, this is precisely the kind of thing that can be done more cleanly on top of the tgsi_info series that I just sent out :) I suggest using that, but for now this approach is also Reviewed-by: Nicolai Hähnle for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { scan_src_operand(info, fullinst, >Src[i], i, tgsi_util_get_inst_usage_mask(fullinst, i), is_interp_instruction, _mem_inst); } if (fullinst->Instruction.Texture) { for (i = 0; i < fullinst->Texture.NumOffsets; i++) { -- Lerne, wie die Welt wirklich ist, Aber vergiss niemals, wie sie sein sollte. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/6] ac/debug: annotate IB dumps with the raw values
From: Nicolai Hähnle--- src/amd/common/ac_debug.c | 84 +-- 1 file changed, 66 insertions(+), 18 deletions(-) diff --git a/src/amd/common/ac_debug.c b/src/amd/common/ac_debug.c index 518893ff481..e92dfbd0e4a 100644 --- a/src/amd/common/ac_debug.c +++ b/src/amd/common/ac_debug.c @@ -44,6 +44,7 @@ #define INDENT_PKT 8 struct ac_ib_parser { + FILE *f; uint32_t *ib; unsigned num_dw; int trace_id; @@ -144,10 +145,14 @@ void ac_dump_reg(FILE *file, unsigned offset, uint32_t value, static uint32_t ac_ib_get(struct ac_ib_parser *ib) { - uint32_t v = 0xdeadbeef; + uint32_t v = 0; - if (ib->cur_dw < ib->num_dw) + if (ib->cur_dw < ib->num_dw) { v = ib->ib[ib->cur_dw]; + fprintf(ib->f, "\n\035#%08x ", v); + } else { + fprintf(ib->f, "\n\035# "); + } ib->cur_dw++; return v; @@ -318,10 +323,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib) ac_dump_reg(f, R_370_CONTROL, ac_ib_get(ib), ~0); ac_dump_reg(f, R_371_DST_ADDR_LO, ac_ib_get(ib), ~0); ac_dump_reg(f, R_372_DST_ADDR_HI, ac_ib_get(ib), ~0); - for (i = 2; i < count; i++) { - print_spaces(f, INDENT_PKT); - fprintf(f, "0x%08x\n", ac_ib_get(ib)); - } + /* The payload is written automatically */ break; case PKT3_CP_DMA: ac_dump_reg(f, R_410_CP_DMA_WORD0, ac_ib_get(ib), ~0); @@ -369,9 +371,9 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib) ib_recurse.num_dw = G_3F2_IB_SIZE(control_dw); ib_recurse.cur_dw = 0; - fprintf(f, "-- nested begin --\n"); + fprintf(f, "\n\035>-- nested begin --\n"); ac_do_parse_ib(f, _recurse); - fprintf(f, "--- nested end ---\n"); + fprintf(f, "\n\035<--- nested end ---\n"); break; } case PKT3_CLEAR_STATE: @@ -417,13 +419,11 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib) } /* print additional dwords */ - while (ib->cur_dw <= first_dw + count) { - print_spaces(f, INDENT_PKT); - fprintf(f, "0x%08x\n", ac_ib_get(ib)); - } + while (ib->cur_dw <= first_dw + count) + ac_ib_get(ib); if (ib->cur_dw > first_dw + count + 1) - fprintf(f, COLOR_RED "! count in header too low !" + fprintf(f, COLOR_RED "\n! count in header too low !" COLOR_RESET "\n"); } @@ -449,13 +449,45 @@ static void ac_do_parse_ib(FILE *f, struct ac_ib_parser *ib) /* fall through */ default: fprintf(f, "Unknown packet type %i\n", type); - return; + break; } } +} - if (ib->cur_dw > ib->num_dw) { - printf("\nPacket ends after the end of IB.\n"); - exit(0); +static void format_ib_output(FILE *f, char *out) +{ + unsigned depth = 0; + + for (;;) { + char op = 0; + + if (out[0] == '\n' && out[1] == '\035') + out++; + if (out[0] == '\035') { + op = out[1]; + out += 2; + } + + if (op == '<') + depth--; + + unsigned indent = 4 * depth; + if (op != '#') + indent += 9; + + if (indent) + print_spaces(f, indent); + + char *end = strchrnul(out, '\n'); + fwrite(out, end - out, 1, f); + fputc('\n', f); /* always end with a new line */ + if (!*end) + break; + + out = end + 1; + + if (op == '>') + depth++; } } @@ -483,7 +515,23 @@ void ac_parse_ib_chunk(FILE *f, uint32_t *ib_ptr, int num_dw, int trace_id, ib.chip_class = chip_class; ib.addr_callback = addr_callback; ib.addr_callback_data = addr_callback_data; - ac_do_parse_ib(f, ); + + char *out; + size_t outsize; + FILE *memf = open_memstream(, ); + ib.f = memf; + ac_do_parse_ib(memf, ); + fclose(memf); + + if (out) { + format_ib_output(f, out); + free(out); + } + + if (ib.cur_dw > ib.num_dw) { + printf("\nPacket ends after the end of IB.\n"); +
[Mesa-dev] [PATCH 6/6] gallium/radeon: fix saving multi-part command streams
From: Nicolai HähnleUse the correct type to fix pointer arithmetic. --- src/gallium/drivers/radeon/r600_pipe_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 37c12dea374..7226fc21a04 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -495,7 +495,7 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags, void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs, struct radeon_saved_cs *saved, bool get_buffer_list) { - void *buf; + uint32_t *buf; unsigned i; /* Save the IB chunks. */ -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/6] ac/debug: invoke valgrind checks while parsing IBs
From: Nicolai HähnleHelp catch garbage data written into IBs. --- src/amd/common/ac_debug.c | 20 1 file changed, 20 insertions(+) diff --git a/src/amd/common/ac_debug.c b/src/amd/common/ac_debug.c index e92dfbd0e4a..2af83a146c8 100644 --- a/src/amd/common/ac_debug.c +++ b/src/amd/common/ac_debug.c @@ -26,6 +26,14 @@ #include "ac_debug.h" +#ifdef HAVE_VALGRIND +#include +#include +#define VG(x) x +#else +#define VG(x) +#endif + #include "sid.h" #include "gfx9d.h" #include "sid_tables.h" @@ -149,6 +157,18 @@ static uint32_t ac_ib_get(struct ac_ib_parser *ib) if (ib->cur_dw < ib->num_dw) { v = ib->ib[ib->cur_dw]; +#ifdef HAVE_VALGRIND + /* Help figure out where garbage data is written to IBs. +* +* Arguably we should do this already when the IBs are written, +* see RADEON_VALGRIND. The problem is that client-requests to +* Valgrind have an overhead even when Valgrind isn't running, +* and radeon_emit is performance sensitive... +*/ + if (VALGRIND_CHECK_VALUE_IS_DEFINED(v)) + fprintf(ib->f, COLOR_RED "Valgrind: The next DWORD is garbage" + COLOR_RESET "\n"); +#endif fprintf(ib->f, "\n\035#%08x ", v); } else { fprintf(ib->f, "\n\035# "); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/6] ac/debug: use an explicit getter for fetching words from the IB
From: Nicolai HähnleGuard against out-of-bounds accesses, and prepare for upcoming changes. --- src/amd/common/ac_debug.c | 368 +++--- 1 file changed, 215 insertions(+), 153 deletions(-) diff --git a/src/amd/common/ac_debug.c b/src/amd/common/ac_debug.c index 42a72c086b1..518893ff481 100644 --- a/src/amd/common/ac_debug.c +++ b/src/amd/common/ac_debug.c @@ -43,6 +43,19 @@ #define INDENT_PKT 8 +struct ac_ib_parser { + uint32_t *ib; + unsigned num_dw; + int trace_id; + enum chip_class chip_class; + ac_debug_addr_callback addr_callback; + void *addr_callback_data; + + unsigned cur_dw; +}; + +static void ac_do_parse_ib(FILE *f, struct ac_ib_parser *ib); + static void print_spaces(FILE *f, unsigned num) { fprintf(f, "%*s", num, ""); @@ -129,11 +142,23 @@ void ac_dump_reg(FILE *file, unsigned offset, uint32_t value, fprintf(file, COLOR_YELLOW "0x%05x" COLOR_RESET " <- 0x%08x\n", offset, value); } -static void ac_parse_set_reg_packet(FILE *f, uint32_t *ib, unsigned count, - unsigned reg_offset) +static uint32_t ac_ib_get(struct ac_ib_parser *ib) +{ + uint32_t v = 0xdeadbeef; + + if (ib->cur_dw < ib->num_dw) + v = ib->ib[ib->cur_dw]; + + ib->cur_dw++; + return v; +} + +static void ac_parse_set_reg_packet(FILE *f, unsigned count, unsigned reg_offset, + struct ac_ib_parser *ib) { - unsigned reg = ((ib[1] & 0x) << 2) + reg_offset; - unsigned index = ib[1] >> 28; + unsigned reg_dw = ac_ib_get(ib); + unsigned reg = ((reg_dw & 0x) << 2) + reg_offset; + unsigned index = reg_dw >> 28; int i; if (index != 0) { @@ -142,17 +167,15 @@ static void ac_parse_set_reg_packet(FILE *f, uint32_t *ib, unsigned count, } for (i = 0; i < count; i++) - ac_dump_reg(f, reg + i*4, ib[2+i], ~0); + ac_dump_reg(f, reg + i*4, ac_ib_get(ib), ~0); } -static uint32_t *ac_parse_packet3(FILE *f, uint32_t *ib, int *num_dw, - int trace_id, enum chip_class chip_class, - ac_debug_addr_callback addr_callback, - void *addr_callback_data) +static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib) { - unsigned count = PKT_COUNT_G(ib[0]); - unsigned op = PKT3_IT_OPCODE_G(ib[0]); - const char *predicate = PKT3_PREDICATE(ib[0]) ? "(predicate)" : ""; + unsigned first_dw = ib->cur_dw; + int count = PKT_COUNT_G(header); + unsigned op = PKT3_IT_OPCODE_G(header); + const char *predicate = PKT3_PREDICATE(header) ? "(predicate)" : ""; int i; /* Print the name first. */ @@ -179,180 +202,206 @@ static uint32_t *ac_parse_packet3(FILE *f, uint32_t *ib, int *num_dw, /* Print the contents. */ switch (op) { case PKT3_SET_CONTEXT_REG: - ac_parse_set_reg_packet(f, ib, count, SI_CONTEXT_REG_OFFSET); + ac_parse_set_reg_packet(f, count, SI_CONTEXT_REG_OFFSET, ib); break; case PKT3_SET_CONFIG_REG: - ac_parse_set_reg_packet(f, ib, count, SI_CONFIG_REG_OFFSET); + ac_parse_set_reg_packet(f, count, SI_CONFIG_REG_OFFSET, ib); break; case PKT3_SET_UCONFIG_REG: - ac_parse_set_reg_packet(f, ib, count, CIK_UCONFIG_REG_OFFSET); + ac_parse_set_reg_packet(f, count, CIK_UCONFIG_REG_OFFSET, ib); break; case PKT3_SET_SH_REG: - ac_parse_set_reg_packet(f, ib, count, SI_SH_REG_OFFSET); + ac_parse_set_reg_packet(f, count, SI_SH_REG_OFFSET, ib); break; case PKT3_ACQUIRE_MEM: - ac_dump_reg(f, R_0301F0_CP_COHER_CNTL, ib[1], ~0); - ac_dump_reg(f, R_0301F4_CP_COHER_SIZE, ib[2], ~0); - ac_dump_reg(f, R_030230_CP_COHER_SIZE_HI, ib[3], ~0); - ac_dump_reg(f, R_0301F8_CP_COHER_BASE, ib[4], ~0); - ac_dump_reg(f, R_0301E4_CP_COHER_BASE_HI, ib[5], ~0); - print_named_value(f, "POLL_INTERVAL", ib[6], 16); + ac_dump_reg(f, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0); + ac_dump_reg(f, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0); + ac_dump_reg(f, R_030230_CP_COHER_SIZE_HI, ac_ib_get(ib), ~0); + ac_dump_reg(f, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0); + ac_dump_reg(f, R_0301E4_CP_COHER_BASE_HI, ac_ib_get(ib), ~0); + print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16); break; case PKT3_SURFACE_SYNC: - if (chip_class >= CIK) { - ac_dump_reg(f, R_0301F0_CP_COHER_CNTL, ib[1], ~0); - ac_dump_reg(f,
[Mesa-dev] [PATCH 1/6] util: fix valgrind errors when dumping pipe_draw_info
From: Nicolai HähnleVarious index-related fields are only initialized when required, so they should only be dumped in those cases. --- src/gallium/auxiliary/util/u_dump_state.c | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index 70bbf5c9fad..c263021a9f6 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -919,9 +919,15 @@ util_dump_draw_info(FILE *stream, const struct pipe_draw_info *state) util_dump_member(stream, uint, state, max_index); util_dump_member(stream, bool, state, primitive_restart); - util_dump_member(stream, uint, state, restart_index); - - util_dump_member(stream, ptr, state, index.resource); + if (state->primitive_restart) + util_dump_member(stream, uint, state, restart_index); + + if (state->index_size) { + if (state->has_user_indices) + util_dump_member(stream, ptr, state, index.user); + else + util_dump_member(stream, ptr, state, index.resource); + } util_dump_member(stream, ptr, state, count_from_stream_output); if (!state->indirect) { -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/6] radeonsi: update comment describing indices into sctx->descriptors
From: Nicolai Hähnle--- src/gallium/drivers/radeonsi/si_state.h | 11 +-- 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index ca701658d0b..26c7b4ca9a3 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -195,13 +195,12 @@ enum { * are contiguous: * * 0 - rw buffers - * 1 - vertex const buffers - * 2 - vertex shader buffers - * ... - * 5 - fragment const buffers - * ... - * 21 - compute const buffers + * 1 - vertex const and shader buffers + * 2 - vertex samplers and images + * 3 - fragment const and shader buffer * ... + * 11 - compute const and shader buffers + * 12 - compute samplers and images */ enum { SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS, -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/6] util, ac, radeonsi: debug tools fixes and improvements
Hi all, The two main changes here are related to amd/common IB parsing: 1. Prefix IB dumps with the raw hex values in the IB. For when you distrust your debugging tools... I've gone back and forth on how to implement this. It's a pity that the stdlib has no way of "overloading" FILE, and I didn't want to write my own FILE proxy, so instead the code writes the dump to memory and adds custom "control codes" which are interpreted for formatting in a second pass. A side benefit is that nested IB dumping should look nicer now, but I haven't tested it. 2. Add explicit valgrind definedness checks. Valgrind will not complain about undefined data written to IBs by default, because it does not understand what happens with the data (it does not understand the ioctls, and has no chance whatsoever of understanding chained and nested IBs). If IB parsing is enabled, Valgrind *will* complain about undefined data, but that's written in the Valgrind log file. With this change, we additionally flag the situation in the IB dump itself, which makes it easier to understand where the undefined data is coming from. There are some related odds and ends, like proper bounds checking when reading from IBs, etc. Please review! Thanks, Nicolai -- src/amd/common/ac_debug.c| 446 +++-- src/gallium/auxiliary/util/u_dump_state.c| 12 +- .../drivers/radeon/r600_pipe_common.c| 2 +- src/gallium/drivers/radeonsi/si_state.h | 11 +- 4 files changed, 303 insertions(+), 168 deletions(-) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] tgsi/scan: fix uses_double
From: Marek Olšák--- src/gallium/auxiliary/tgsi/tgsi_scan.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 2fd7d7c..db87ce3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -450,22 +450,28 @@ scan_instruction(struct tgsi_shader_info *info, info->uses_linear_opcode_interp_offset = TRUE; break; case TGSI_OPCODE_INTERP_SAMPLE: info->uses_linear_opcode_interp_sample = TRUE; break; } break; } } - if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D && - fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG) + if ((fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D && +fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG) || + fullinst->Instruction.Opcode == TGSI_OPCODE_DFMA || + fullinst->Instruction.Opcode == TGSI_OPCODE_DDIV || + fullinst->Instruction.Opcode == TGSI_OPCODE_D2U64 || + fullinst->Instruction.Opcode == TGSI_OPCODE_D2I64 || + fullinst->Instruction.Opcode == TGSI_OPCODE_U642D || + fullinst->Instruction.Opcode == TGSI_OPCODE_I642D) info->uses_doubles = TRUE; for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { scan_src_operand(info, fullinst, >Src[i], i, tgsi_util_get_inst_usage_mask(fullinst, i), is_interp_instruction, _mem_inst); } if (fullinst->Instruction.Texture) { for (i = 0; i < fullinst->Texture.NumOffsets; i++) { -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/5] gallium: use tgsi_get_opcode_name instead of tgsi_opcode_info::mnemonic
From: Nicolai Hähnle--- src/gallium/auxiliary/gallivm/lp_bld_tgsi.c | 2 +- src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_dump.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_sanity.c| 6 -- src/gallium/auxiliary/tgsi/tgsi_text.c | 5 +++-- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c index ebd4fe5f6a9..d7e92aa8387 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c @@ -531,7 +531,7 @@ lp_build_tgsi_llvm( tgsi_get_opcode_info(instr->Instruction.Opcode); if (!lp_build_tgsi_inst_llvm(bld_base, instr)) { _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", - opcode_info->mnemonic); + tgsi_get_opcode_name(instr->Instruction.Opcode)); return FALSE; } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index b76c065e324..2529c6a8bf9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -956,7 +956,7 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm, tgsi_get_opcode_info(instr->Instruction.Opcode); if (!lp_emit_instruction_aos(, instr, opcode_info, )) _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", - opcode_info->mnemonic); + tgsi_get_opcode_name(instr->Instruction.Opcode)); } if (0) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index b58e64511ce..f6c85390e90 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -578,7 +578,7 @@ iter_instruction( TXT( " " ); ctx->indent += info->post_indent; - TXT( info->mnemonic ); + TXT( tgsi_get_opcode_name(inst->Instruction.Opcode) ); if (inst->Instruction.Saturate) { TXT( "_SAT" ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index a95bbfa9880..2c9ad993331 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -326,10 +326,12 @@ iter_instruction( } if (info->num_dst != inst->Instruction.NumDstRegs) { - report_error( ctx, "%s: Invalid number of destination operands, should be %u", info->mnemonic, info->num_dst ); + report_error( ctx, "%s: Invalid number of destination operands, should be %u", +tgsi_get_opcode_name(inst->Instruction.Opcode), info->num_dst ); } if (info->num_src != inst->Instruction.NumSrcRegs) { - report_error( ctx, "%s: Invalid number of source operands, should be %u", info->mnemonic, info->num_src ); + report_error( ctx, "%s: Invalid number of source operands, should be %u", +tgsi_get_opcode_name(inst->Instruction.Opcode), info->num_src ); } /* Check destination and source registers' validity. diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 4cb67c5f063..02241a66bfe 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -1003,16 +1003,17 @@ match_inst(const char **pcur, const struct tgsi_opcode_info *info) { const char *cur = *pcur; + const char *mnemonic = tgsi_get_opcode_name(info->opcode); /* simple case: the whole string matches the instruction name */ - if (str_match_nocase_whole(, info->mnemonic)) { + if (str_match_nocase_whole(, mnemonic)) { *pcur = cur; *saturate = 0; *precise = 0; return TRUE; } - if (str_match_no_case(, info->mnemonic)) { + if (str_match_no_case(, mnemonic)) { /* the instruction has a suffix, figure it out */ if (str_match_no_case(, "_SAT")) { *pcur = cur; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/5] tgsi: remove post_indent from some 64-bit opcodes
From: Nicolai Hähnle--- src/gallium/auxiliary/tgsi/tgsi_info.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 0a82dbb14ca..472c088c7de 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -81,14 +81,14 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US }, { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B }, { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB }, - { 1, 1, 0, 0, 0, 0, 1, COMP, "D2U64", TGSI_OPCODE_D2U64 }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "D2U64", TGSI_OPCODE_D2U64 }, { 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ }, - { 1, 1, 0, 0, 0, 0, 1, COMP, "D2I64", TGSI_OPCODE_D2I64 }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "D2I64", TGSI_OPCODE_D2I64 }, { 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT }, { 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN }, { 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE }, { 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE }, - { 1, 1, 0, 0, 0, 0, 1, COMP, "U642D", TGSI_OPCODE_U642D }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "U642D", TGSI_OPCODE_U642D }, { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX }, { 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD }, { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP }, @@ -96,10 +96,10 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US }, { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B }, { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB }, - { 1, 1, 0, 0, 0, 0, 1, COMP, "U642F", TGSI_OPCODE_U642F }, - { 1, 1, 0, 0, 0, 0, 1, COMP, "I642F", TGSI_OPCODE_I642F }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "U642F", TGSI_OPCODE_U642F }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "I642F", TGSI_OPCODE_I642F }, { 1, 1, 0, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR }, - { 1, 1, 0, 0, 0, 0, 1, COMP, "I642D", TGSI_OPCODE_I642D }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "I642D", TGSI_OPCODE_I642D }, { 0, 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL }, { 0, 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET }, { 1, 1, 0, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG }, -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/5] tgsi: macro-ify the opcodes table
From: Nicolai HähnleSo we can easily re-arrange members of tgsi_opcode_info, and readers of the code don't have to guess what all the 0s mean. Mostly done with regex search --- src/gallium/auxiliary/Makefile.sources | 1 + src/gallium/auxiliary/tgsi/tgsi_info.c | 262 ++--- src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h | 251 +++ 3 files changed, 263 insertions(+), 251 deletions(-) create mode 100644 src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index 1b0eeb99581..36d6c8fb4f4 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -151,6 +151,7 @@ C_SOURCES := \ tgsi/tgsi_from_mesa.h \ tgsi/tgsi_info.c \ tgsi/tgsi_info.h \ + tgsi/tgsi_info_opcodes.h \ tgsi/tgsi_iterate.c \ tgsi/tgsi_iterate.h \ tgsi/tgsi_lowering.c \ diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 472c088c7de..5112826aafb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -35,261 +35,21 @@ #define CHAN TGSI_OUTPUT_CHAN_DEPENDENT #define OTHR TGSI_OUTPUT_OTHER -static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = -{ - { 1, 1, 0, 0, 0, 0, 0, COMP, "ARL", TGSI_OPCODE_ARL }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "MOV", TGSI_OPCODE_MOV }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "LIT", TGSI_OPCODE_LIT }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "RCP", TGSI_OPCODE_RCP }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "RSQ", TGSI_OPCODE_RSQ }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "EXP", TGSI_OPCODE_EXP }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "LOG", TGSI_OPCODE_LOG }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MUL", TGSI_OPCODE_MUL }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "ADD", TGSI_OPCODE_ADD }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "DP3", TGSI_OPCODE_DP3 }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "DP4", TGSI_OPCODE_DP4 }, - { 1, 2, 0, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX_LZ", TGSI_OPCODE_TEX_LZ }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, - { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, - { 1, 3, 0, 0, 0, 0, 0, REPL, "", 21 }, /* removed */ - { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF_LZ", TGSI_OPCODE_TXF_LZ }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "", 31 }, /* removed */ - { 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 }, - { 1, 0, 0, 0, 0, 0, 0, OTHR, "CLOCK", TGSI_OPCODE_CLOCK }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 }, - { 1, 2, 0, 0, 0, 0, 0, REPL, "", 35 }, /* removed */ - { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "D2U64", TGSI_OPCODE_D2U64 }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "D2I64", TGSI_OPCODE_D2I64 }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT }, - { 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "U642D", TGSI_OPCODE_U642D }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX }, - { 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD }, - { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B }, - { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB }, - { 1, 1, 0, 0, 0, 0, 0, COMP, "U642F",
[Mesa-dev] [PATCH 1/5] tgsi: reduce tgsi_opcode_info::pre_dedent and post_indent to 1 bit
From: Nicolai HähnleIt's not clear why they were ever 2 bits to begin with. Perhaps the original intent was to use signed values, but that doesn't seem to have ever been the case in master. --- src/gallium/auxiliary/tgsi/tgsi_info.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h index e60888fec8a..e65f7ac3b74 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.h +++ b/src/gallium/auxiliary/tgsi/tgsi_info.h @@ -76,8 +76,8 @@ struct tgsi_opcode_info unsigned is_tex:1; unsigned is_store:1; unsigned is_branch:1; - int pre_dedent:2; - int post_indent:2; + unsigned pre_dedent:1; + unsigned post_indent:1; enum tgsi_output_mode output_mode:3; const char *mnemonic; uint opcode; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/5] tgsi: store opcode mnemonics in a separate table
From: Nicolai HähnleThey are only used for debug info. Together with making tgsi_opcode_info::opcode a bitfield, this reduces the size of tgsi_opcode_info on 64-bit systems from 24 bytes to 4 bytes, and makes the whole data structure a bit more linker friendly. --- src/gallium/auxiliary/tgsi/tgsi_info.c | 19 +++ src/gallium/auxiliary/tgsi/tgsi_info.h | 3 +-- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 5112826aafb..08bce6380c9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -36,11 +36,11 @@ #define OTHR TGSI_OUTPUT_OTHER #define OPCODE(_num_dst, _num_src, _output_mode, name, ...) \ - { .mnemonic = #name, .opcode = TGSI_OPCODE_ ## name, \ + { .opcode = TGSI_OPCODE_ ## name, \ .output_mode = _output_mode, .num_dst = _num_dst, .num_src = _num_src, \ ##__VA_ARGS__ }, -#define OPCODE_GAP(opc) { .mnemonic = "", .opcode = opc }, +#define OPCODE_GAP(opc) { .opcode = opc }, static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { @@ -69,12 +69,23 @@ tgsi_get_opcode_info( uint opcode ) return NULL; } +#define OPCODE(_num_dst, _num_src, _output_mode, name, ...) #name, +#define OPCODE_GAP(opc) "UNK" #opc, + +static const char * const opcode_names[TGSI_OPCODE_LAST] = +{ +#include "tgsi_info_opcodes.h" +}; + +#undef OPCODE +#undef OPCODE_GAP const char * tgsi_get_opcode_name( uint opcode ) { - const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode); - return info->mnemonic; + if (opcode >= ARRAY_SIZE(opcode_names)) + return "UNK_OOB"; + return opcode_names[opcode]; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h index e65f7ac3b74..74bff186924 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.h +++ b/src/gallium/auxiliary/tgsi/tgsi_info.h @@ -79,8 +79,7 @@ struct tgsi_opcode_info unsigned pre_dedent:1; unsigned post_indent:1; enum tgsi_output_mode output_mode:3; - const char *mnemonic; - uint opcode; + unsigned opcode:8; }; const struct tgsi_opcode_info * -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/5] tgsi: macro-ify the opcode info table
Hi all, The recent changes and required rebases reminded me that I had this series lying around :) Basically, I'd been annoyed by the format of the TGSI opcode info table for a long time, because it really discourages extensibility. This series cleans that up and then does some obvious micro- optimizations that the refactoring makes feasible. Please review! Thanks, Nicolai -- src/gallium/auxiliary/Makefile.sources | 1 + src/gallium/auxiliary/gallivm/lp_bld_tgsi.c | 2 +- .../auxiliary/gallivm/lp_bld_tgsi_aos.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_dump.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_info.c | 277 ++--- src/gallium/auxiliary/tgsi/tgsi_info.h | 7 +- .../auxiliary/tgsi/tgsi_info_opcodes.h | 251 +++ src/gallium/auxiliary/tgsi/tgsi_sanity.c | 6 +- src/gallium/auxiliary/tgsi/tgsi_text.c | 5 +- 9 files changed, 289 insertions(+), 264 deletions(-) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] gallium/docs: Add missing word "Not"
Signed-off-by: Mun Gwan-gyeong--- src/gallium/docs/source/tgsi.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index b148c3c939..f9b1385e55 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -1762,7 +1762,7 @@ two-component vectors with doubled precision in each component. dst.z = src0.zw == src1.zw ? \sim 0 : 0 -.. opcode:: DSNE - Set on Equal +.. opcode:: DSNE - Set on Not Equal .. math:: -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] gallium/docs: improve docs for SAMPLE_POS, SAMPLE_INFO, TXQS, MSAA semantics
On Sun, Aug 20, 2017 at 12:32 AM, Roland Scheideggerwrote: > Am 19.08.2017 um 21:32 schrieb Marek Olšák: >> How about we remove all opcodes that are unused? Like: >> >> SAMPLE_POS >> SAMPLE_INFO >> SAMPLE >> SAMPLE_I >> SAMPLE_I_MS >> SAMPLE_B >> SAMPLE_C >> SAMPLE_C_LZ >> SAMPLE_D >> SAMPLE_L >> GATHER4 >> SVIEWINFO > These are all d3d10 opcodes, and we need them (llvmpipe supports all of > them with the exception of sample_pos and sample_info, right now). (It's SAMPLE_INFO is almost the same as TXQS and given the current state of driver support, it would be better to remove SAMPLE_INFO and keep TXQS. SAMPLE_INFO returns (samples, 0, 0, 0), while TXQS returns (samples, undef, undef, undef). There is also RESQ, which returns (w, h, d|layers, samples). Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] mapi/gen: remove shebang from the marshal generator scripts
With Eric's comment addressed, for the series: Reviewed-by: Nicolai HähnleOn 22.08.2017 12:39, Emil Velikov wrote: From: Emil Velikov The scripts are invoked with the correct version of python and are missing the execute bit. Follow the rest of Mesa and drop the shebang line. Signed-off-by: Emil Velikov --- src/mapi/glapi/gen/gl_marshal.py | 1 - src/mapi/glapi/gen/gl_marshal_h.py | 1 - src/mapi/glapi/gen/marshal_XML.py | 1 - 3 files changed, 3 deletions(-) diff --git a/src/mapi/glapi/gen/gl_marshal.py b/src/mapi/glapi/gen/gl_marshal.py index efa4d9e6f90..6a2c0d7b314 100644 --- a/src/mapi/glapi/gen/gl_marshal.py +++ b/src/mapi/glapi/gen/gl_marshal.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # Copyright (C) 2012 Intel Corporation # diff --git a/src/mapi/glapi/gen/gl_marshal_h.py b/src/mapi/glapi/gen/gl_marshal_h.py index 6e39148d29a..998ca5930d4 100644 --- a/src/mapi/glapi/gen/gl_marshal_h.py +++ b/src/mapi/glapi/gen/gl_marshal_h.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # Copyright (C) 2012 Intel Corporation # diff --git a/src/mapi/glapi/gen/marshal_XML.py b/src/mapi/glapi/gen/marshal_XML.py index 80f7f542e43..d761e58ce83 100644 --- a/src/mapi/glapi/gen/marshal_XML.py +++ b/src/mapi/glapi/gen/marshal_XML.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # Copyright (C) 2012 Intel Corporation # -- Lerne, wie die Welt wirklich ist, Aber vergiss niemals, wie sie sein sollte. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] st/mesa: select Z16 for GL_DEPTH_COMPONENT
Are you sure we really want this? I could well imagine badly coded applications that don't explicitly specify the required Z depth, but will have artifacts with 16 bits because they need the precision. Cheers, Nicolai On 22.08.2017 16:39, Marek Olšák wrote: From: Marek Olšák--- src/mesa/state_tracker/st_format.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index 348853a..7a00a3e 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -1078,23 +1078,23 @@ struct format_mapping DEFAULT_RGBA_FORMATS #define DEFAULT_SRGBA_FORMATS \ PIPE_FORMAT_R8G8B8A8_SRGB, \ PIPE_FORMAT_B8G8R8A8_SRGB, \ PIPE_FORMAT_A8R8G8B8_SRGB, \ PIPE_FORMAT_A8B8G8R8_SRGB, \ 0 #define DEFAULT_DEPTH_FORMATS \ + PIPE_FORMAT_Z16_UNORM, \ PIPE_FORMAT_Z24X8_UNORM, \ PIPE_FORMAT_X8Z24_UNORM, \ - PIPE_FORMAT_Z16_UNORM, \ PIPE_FORMAT_Z24_UNORM_S8_UINT, \ PIPE_FORMAT_S8_UINT_Z24_UNORM, \ 0 #define DEFAULT_SNORM8_RGBA_FORMATS \ PIPE_FORMAT_R8G8B8A8_SNORM, \ 0 #define DEFAULT_UNORM16_RGBA_FORMATS \ PIPE_FORMAT_R16G16B16A16_UNORM, \ -- Lerne, wie die Welt wirklich ist, Aber vergiss niemals, wie sie sein sollte. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] gallium: add CONSTBUF type to tgsi_file_type
On Tue, Aug 22, 2017 at 10:51 AM, Roland Scheideggerwrote: > I am probably missing something here, but why do you need a new register > file? Since you couldn't use LOAD with TGSI_FILE_CONSTANT before, can't > you just allow LOAD with TGSI_FILE_CONSTANT and achieve the same thing? > Or do you need to know how it's going to be accessed in advance? With bindless, LOAD can take a CONST I believe [which contains the value of the bindless id]. I think it's nice to keep those concepts separate... having CONST sometimes mean the value and other times mean the address is a bit weird. This way CONSTBUF[0] is the address of the 0th constbuf. -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] gallium: add CONSTBUF type to tgsi_file_type
I am probably missing something here, but why do you need a new register file? Since you couldn't use LOAD with TGSI_FILE_CONSTANT before, can't you just allow LOAD with TGSI_FILE_CONSTANT and achieve the same thing? Or do you need to know how it's going to be accessed in advance? Roland Am 22.08.2017 um 14:14 schrieb Timothy Arceri: > This will be use to distinguish between load types when using > the TGSI_OPCODE_LOAD opcode. > --- > src/gallium/auxiliary/tgsi/tgsi_strings.c | 1 + > src/gallium/include/pipe/p_shader_tokens.h | 1 + > 2 files changed, 2 insertions(+) > > diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c > b/src/gallium/auxiliary/tgsi/tgsi_strings.c > index 7ce12d3655..0872db9ce8 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c > +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c > @@ -50,20 +50,21 @@ static const char *tgsi_file_names[] = > "OUT", > "TEMP", > "SAMP", > "ADDR", > "IMM", > "SV", > "IMAGE", > "SVIEW", > "BUFFER", > "MEMORY", > + "CONSTBUF", > }; > > const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] = > { > "POSITION", > "COLOR", > "BCOLOR", > "FOG", > "PSIZE", > "GENERIC", > diff --git a/src/gallium/include/pipe/p_shader_tokens.h > b/src/gallium/include/pipe/p_shader_tokens.h > index aa0fb3e3b3..f9cb6183ce 100644 > --- a/src/gallium/include/pipe/p_shader_tokens.h > +++ b/src/gallium/include/pipe/p_shader_tokens.h > @@ -67,20 +67,21 @@ enum tgsi_file_type { > TGSI_FILE_OUTPUT, > TGSI_FILE_TEMPORARY, > TGSI_FILE_SAMPLER, > TGSI_FILE_ADDRESS, > TGSI_FILE_IMMEDIATE, > TGSI_FILE_SYSTEM_VALUE, > TGSI_FILE_IMAGE, > TGSI_FILE_SAMPLER_VIEW, > TGSI_FILE_BUFFER, > TGSI_FILE_MEMORY, > + TGSI_FILE_CONSTBUF, > TGSI_FILE_COUNT, /**< how many TGSI_FILE_ types */ > }; > > > #define TGSI_WRITEMASK_NONE 0x00 > #define TGSI_WRITEMASK_X0x01 > #define TGSI_WRITEMASK_Y0x02 > #define TGSI_WRITEMASK_XY 0x03 > #define TGSI_WRITEMASK_Z0x04 > #define TGSI_WRITEMASK_XZ 0x05 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] isl: Validate row pitch of stencil surfaces.
On Tue, Aug 22, 2017 at 2:07 AM, Andres Gomezwrote: > On Thu, 2017-08-10 at 12:51 -0700, Jason Ekstrand wrote: > > On August 10, 2017 12:45:43 PM Ilia Mirkin wrote: > > > > > On Wed, Aug 9, 2017 at 4:09 PM, Kenneth Graunke > wrote: > > > > Also, silence an obnoxious finishme that started occurring for all > > > > GL applications which use stencil after the i965 ISL conversion. > > > > > > Without commenting on the correctness of the patch, either this or > > > something like it should really end up in 17.2. > > > > > > Cheers, > > > > Agreed > > Would we want it also in 17.1 ? > I wouldn't bother ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH] i965/tex: Don't pass samples to miptree_create_for_teximage
On Tue, Aug 22, 2017 at 5:23 AM, Andres Gomezwrote: > Jason, 76e2f390f9863a35 didn't land in 17.1 so I understand this is not > really needed there and we only need it for 17.2. > > WDYT? > This only applies to 17.2 > On Sat, 2017-08-19 at 11:07 -0700, Jason Ekstrand wrote: > > In 76e2f390f9863a35, when Topi switched num_samples from 0 to 1 for > > single-sampled, he accidentally switched the last parameter in the call > > to miptree_create_for_teximage from 0 to 1 thinking it was num_samples > > when it was actually layout_flags. Switching from 0 to 1 added the > > MIPTREE_LAYOUT_ACCELERATED_UPLOAD flag which causes us to allocate a > > busy BO instead of an idle one. This caused the subsequent CPU upload > > to consistently stall. The end result was a 15% performance drop in the > > SynMark v7 DrvRes microbenchmark. This restores the old behavior and > > fixes the performance regression. > > > > Cc: Kenneth Graunke > > Cc: Topi Pohjolainen > > Fixes: 76e2f390f9863a356d1419982dec705260d67eff > > Bugzilla: https://bugs.freedesktop.org/102260 > > Cc: mesa-sta...@lists.freedesktop.org > > --- > > src/mesa/drivers/dri/i965/intel_tex.c | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/src/mesa/drivers/dri/i965/intel_tex.c > b/src/mesa/drivers/dri/i965/intel_tex.c > > index 890c82d..94a7ad3 100644 > > --- a/src/mesa/drivers/dri/i965/intel_tex.c > > +++ b/src/mesa/drivers/dri/i965/intel_tex.c > > @@ -94,7 +94,7 @@ intel_alloc_texture_image_buffer(struct gl_context > *ctx, > > } else { > >intel_image->mt = intel_miptree_create_for_teximage(brw, > intel_texobj, > >intel_image, > > - 1 /* samples > */); > > + > MIPTREE_CREATE_DEFAULT); > >if (!intel_image->mt) > > return false; > > > -- > Br, > > Andres > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] st/mesa: select Z16 for GL_DEPTH_COMPONENT
From: Marek Olšák--- src/mesa/state_tracker/st_format.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index 348853a..7a00a3e 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -1078,23 +1078,23 @@ struct format_mapping DEFAULT_RGBA_FORMATS #define DEFAULT_SRGBA_FORMATS \ PIPE_FORMAT_R8G8B8A8_SRGB, \ PIPE_FORMAT_B8G8R8A8_SRGB, \ PIPE_FORMAT_A8R8G8B8_SRGB, \ PIPE_FORMAT_A8B8G8R8_SRGB, \ 0 #define DEFAULT_DEPTH_FORMATS \ + PIPE_FORMAT_Z16_UNORM, \ PIPE_FORMAT_Z24X8_UNORM, \ PIPE_FORMAT_X8Z24_UNORM, \ - PIPE_FORMAT_Z16_UNORM, \ PIPE_FORMAT_Z24_UNORM_S8_UINT, \ PIPE_FORMAT_S8_UINT_Z24_UNORM, \ 0 #define DEFAULT_SNORM8_RGBA_FORMATS \ PIPE_FORMAT_R8G8B8A8_SNORM, \ 0 #define DEFAULT_UNORM16_RGBA_FORMATS \ PIPE_FORMAT_R16G16B16A16_UNORM, \ -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 102318] Mesa3D Scons build - LLVM 5.0 not supported
https://bugs.freedesktop.org/show_bug.cgi?id=102318 Alex Grannichanged: What|Removed |Added Summary|Scons build - LLVM 5.0 not |Mesa3D Scons build - LLVM |supported |5.0 not supported -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 7/8] gallium: remove TGSI opcode SCS
Am 22.08.2017 um 15:17 schrieb Jose Fonseca: > On 22/08/17 12:28, Marek Olšák wrote: >> On Tue, Aug 22, 2017 at 1:10 PM, Jose Fonseca>> wrote: >>> On 20/08/17 01:49, Marek Olšák wrote: From: Marek Olšák use COS+SIN instead. >>> >>> >>> I don't know if any existing gallium driver leverages that, but it's >>> a basic >>> trigonometric principle that one can easily extract the sin from cos or >>> vice-versa. It requires some extra care for getting the right sign. >>> But >>> the fact is that it should be considerably cheaper to comput both >>> simultaneously than indepdently. >>> >>> Unfortunately GLSL/SPIR-V doesn't allow to express that. D3D9/D3D11 and >>> Metal all do. And from what I've seen from D3D9/D3D11 apps, 99% of the >>> times the shader wants both SIN/COS at the same time. >>> >>> If we want one opcode to rule them all, then a combined SIN+COS seems a >>> better choice IMO. On SM4 the sincos has two outputs: >>> https://msdn.microsoft.com/en-us/library/windows/desktop/hh447234.aspx but >>> >>> they are both optional to use. I don't know if there's a precedent for >>> that. I recall we had similar discussions about UMUL/UMUL_HI, and I >>> suspect >>> we chose not to go that route. >>> >>> >>> Don't GPUs allow to express the computation of both sin/cos with a >>> single >>> opcode? If nothing else there would be a non-negligible impact of >>> leveraging this in llvmpipe at some point. On the other hand, is >>> possible >>> that LLVM common-subexpression elimination optimization passes >>> already do >>> that, so we gain nothing. >>> >>> >>> In short, not big deal either way, but I think it's worth give it a 2nd >>> thought here. >> >> R300 doesn't have trigonometric functions. R500, R600 and later VLIW >> chips, and GCN all only have separate sin and cos. >> >> svga is the only driver that has sincos. No gallium hardware driver >> has that. > > I see. Fair enough. Considering that, plus the fact that GLSL doesn't > have conbined sin/cos, and that LLVM will most likely eliminate common > expressions generated by llvmpipe for cos/sin with same arg, there's > really not a significant upside left to justify keeping this around. > > FWIW the patch is > > Acked-by: Jose Fonseca > > Jose > FWIW old i965 chips had a SINCOS instruction, in addition to SIN and COS (those using the shared mathbox). However, even there the docs state for throughput "The two-output-phase SINCOS function is implemented as back to back SIN and COS functions." So it looks like the execution isn't actually faster there neither, albeit it would be faster due to only having to issue one send message. (But don't ask me how the chips actually implement this...) Roland ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/7 v2] configure.ac: Introduce HAVE_ARM_ASM/HAVE_AARCH64_ASM and the -D flags.
Hi Eric, On 10 August 2017 at 23:43, Eric Anholtwrote: > +aarch64) > +DEFINES="$DEFINES -DUSE_AARCH64_ASM" I cannot see any places where the define is used. Am I missing something or there isn't any? Do you have some WIP patches that make use of it? Thanks Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 7/8] gallium: remove TGSI opcode SCS
On 22/08/17 12:28, Marek Olšák wrote: On Tue, Aug 22, 2017 at 1:10 PM, Jose Fonsecawrote: On 20/08/17 01:49, Marek Olšák wrote: From: Marek Olšák use COS+SIN instead. I don't know if any existing gallium driver leverages that, but it's a basic trigonometric principle that one can easily extract the sin from cos or vice-versa. It requires some extra care for getting the right sign. But the fact is that it should be considerably cheaper to comput both simultaneously than indepdently. Unfortunately GLSL/SPIR-V doesn't allow to express that. D3D9/D3D11 and Metal all do. And from what I've seen from D3D9/D3D11 apps, 99% of the times the shader wants both SIN/COS at the same time. If we want one opcode to rule them all, then a combined SIN+COS seems a better choice IMO. On SM4 the sincos has two outputs: https://msdn.microsoft.com/en-us/library/windows/desktop/hh447234.aspx but they are both optional to use. I don't know if there's a precedent for that. I recall we had similar discussions about UMUL/UMUL_HI, and I suspect we chose not to go that route. Don't GPUs allow to express the computation of both sin/cos with a single opcode? If nothing else there would be a non-negligible impact of leveraging this in llvmpipe at some point. On the other hand, is possible that LLVM common-subexpression elimination optimization passes already do that, so we gain nothing. In short, not big deal either way, but I think it's worth give it a 2nd thought here. R300 doesn't have trigonometric functions. R500, R600 and later VLIW chips, and GCN all only have separate sin and cos. svga is the only driver that has sincos. No gallium hardware driver has that. I see. Fair enough. Considering that, plus the fact that GLSL doesn't have conbined sin/cos, and that LLVM will most likely eliminate common expressions generated by llvmpipe for cos/sin with same arg, there's really not a significant upside left to justify keeping this around. FWIW the patch is Acked-by: Jose Fonseca Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] xmlconfig: use the portable __VA_ARGS__
On 22 August 2017 at 12:07, Eric Engestromwrote: > On Tuesday, 2017-08-22 11:39:35 +0100, Emil Velikov wrote: >> From: Emil Velikov >> >> Follow the example used through mesa and use "..." + "__VA_ARGS__". >> The former tends to be more common and portable. >> >> Signed-off-by: Emil Velikov >> --- >> src/util/xmlconfig.c | 12 ++-- >> 1 file changed, 6 insertions(+), 6 deletions(-) >> >> diff --git a/src/util/xmlconfig.c b/src/util/xmlconfig.c >> index d3f47ecda0c..7d1c524a955 100644 >> --- a/src/util/xmlconfig.c >> +++ b/src/util/xmlconfig.c >> @@ -466,11 +466,11 @@ __driUtilMessage(const char *f, ...) >>(int) XML_GetCurrentLineNumber(data->parser), \ >>(int) XML_GetCurrentColumnNumber(data->parser)); \ >> } while (0) >> -#define XML_WARNING(msg,args...) do { \ >> +#define XML_WARNING(msg, ...) do { \ >> __driUtilMessage ("Warning in %s line %d, column %d: "msg, data->name, \ >>(int) XML_GetCurrentLineNumber(data->parser), \ >>(int) XML_GetCurrentColumnNumber(data->parser), \ >> - args); \ >> + _VA_ARGS__); \ > > Missing underscore here, and these should be `##__VA_ARGS__` if we want > to allow trivial `msg` with no argument (which I assume we do?) > AFAICT we really don't care if we've got the leading ##. __driUtilMessage() uses va_start/va_end which should work in either case. Regardless adding it would be better indeed. > With that, series is > Reviewed-by: Eric Engestrom > Thanks Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH] i965/tex: Don't pass samples to miptree_create_for_teximage
Jason, 76e2f390f9863a35 didn't land in 17.1 so I understand this is not really needed there and we only need it for 17.2. WDYT? On Sat, 2017-08-19 at 11:07 -0700, Jason Ekstrand wrote: > In 76e2f390f9863a35, when Topi switched num_samples from 0 to 1 for > single-sampled, he accidentally switched the last parameter in the call > to miptree_create_for_teximage from 0 to 1 thinking it was num_samples > when it was actually layout_flags. Switching from 0 to 1 added the > MIPTREE_LAYOUT_ACCELERATED_UPLOAD flag which causes us to allocate a > busy BO instead of an idle one. This caused the subsequent CPU upload > to consistently stall. The end result was a 15% performance drop in the > SynMark v7 DrvRes microbenchmark. This restores the old behavior and > fixes the performance regression. > > Cc: Kenneth Graunke> Cc: Topi Pohjolainen > Fixes: 76e2f390f9863a356d1419982dec705260d67eff > Bugzilla: https://bugs.freedesktop.org/102260 > Cc: mesa-sta...@lists.freedesktop.org > --- > src/mesa/drivers/dri/i965/intel_tex.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/intel_tex.c > b/src/mesa/drivers/dri/i965/intel_tex.c > index 890c82d..94a7ad3 100644 > --- a/src/mesa/drivers/dri/i965/intel_tex.c > +++ b/src/mesa/drivers/dri/i965/intel_tex.c > @@ -94,7 +94,7 @@ intel_alloc_texture_image_buffer(struct gl_context *ctx, > } else { >intel_image->mt = intel_miptree_create_for_teximage(brw, intel_texobj, >intel_image, > - 1 /* samples */); > + > MIPTREE_CREATE_DEFAULT); >if (!intel_image->mt) > return false; > -- Br, Andres ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radeonsi/gfx9: add a temporary workaround for a tessellation driver bug
On 22.08.2017 14:10, Nicolai Hähnle wrote: On 22.08.2017 13:00, Marek Olšák wrote: On Tue, Aug 22, 2017 at 9:37 AM, Nicolai Hähnlewrote: On 18.08.2017 19:06, Marek Olšák wrote: Ping. On Wed, Aug 16, 2017 at 12:57 AM, Marek Olšák wrote: From: Marek Olšák The workaround will do for now. The root cause is still unknown. This fixes new piglit: 16in-1out I don't see this test. Did you already send it out? "[PATCH] arb_tessellation_shader: new tests for a radeonsi bug" on the piglit ML. Curious, I can't reproduce the problem on my Raven. VGT_LS_HS_CONFIG.NUM_PATCHES is 16, so there should definitely be more than one wave per thread-group. >> Marek -- Lerne, wie die Welt wirklich ist, Aber vergiss niemals, wie sie sein sollte. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 7/7] radeonsi: enable STD430 packing of UBOs by default
Before this change we were defaulting to STD140 which is slightly less efficient at packing arrays. --- src/gallium/drivers/radeonsi/si_pipe.c | 2 +- src/mesa/state_tracker/st_context.c| 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 1d1db02c76..b41a6f5ce7 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -522,20 +522,21 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: case PIPE_CAP_DOUBLES: case PIPE_CAP_TGSI_TEX_TXF_LZ: case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: case PIPE_CAP_BINDLESS_TEXTURE: case PIPE_CAP_QUERY_TIMESTAMP: case PIPE_CAP_QUERY_TIME_ELAPSED: case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: case PIPE_CAP_QUERY_SO_OVERFLOW: case PIPE_CAP_MEMOBJ: + case PIPE_CAP_LOAD_CONSTBUF: return 1; case PIPE_CAP_INT64: case PIPE_CAP_INT64_DIVMOD: case PIPE_CAP_TGSI_CLOCK: case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: return 1; case PIPE_CAP_TGSI_VOTE: @@ -611,21 +612,20 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES: case PIPE_CAP_MAX_WINDOW_RECTANGLES: case PIPE_CAP_NATIVE_FENCE_FD: case PIPE_CAP_TGSI_FS_FBFETCH: case PIPE_CAP_TGSI_MUL_ZERO_WINS: case PIPE_CAP_UMA: case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE: case PIPE_CAP_POST_DEPTH_COVERAGE: - case PIPE_CAP_LOAD_CONSTBUF: return 0; case PIPE_CAP_QUERY_BUFFER_OBJECT: return si_have_tgsi_compute(sscreen); case PIPE_CAP_DRAW_PARAMETERS: case PIPE_CAP_MULTI_DRAW_INDIRECT: case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: return sscreen->has_draw_indirect_multi; diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index ef2e73e741..bd990fc34a 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -367,20 +367,23 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, /* Need these flags: */ ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; if (no_error) ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR; + ctx->Const.UseSTD430AsDefaultPacking = + screen->get_param(screen, PIPE_CAP_LOAD_CONSTBUF); + st->has_stencil_export = screen->get_param(screen, PIPE_CAP_SHADER_STENCIL_EXPORT); st->has_shader_model3 = screen->get_param(screen, PIPE_CAP_SM3); st->has_etc1 = screen->is_format_supported(screen, PIPE_FORMAT_ETC1_RGB8, PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); st->has_etc2 = screen->is_format_supported(screen, PIPE_FORMAT_ETC2_RGB8, PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); st->prefer_blit_based_texture_transfer = screen->get_param(screen, -- 2.13.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/7] mesa: pass ctx to add_uniform_to_shader constructor
Fixes: 4c2422067b5c ("glsl: pass UseSTD430AsDefaultPacking to where it will be used") --- src/mesa/program/ir_to_mesa.cpp| 10 ++ src/mesa/program/ir_to_mesa.h | 3 ++- src/mesa/state_tracker/st_glsl_to_nir.cpp | 2 +- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index c168162b57..78f2449878 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2409,21 +2409,22 @@ print_program(struct prog_instruction *mesa_instructions, indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent, PROG_PRINT_DEBUG, NULL); } } namespace { class add_uniform_to_shader : public program_resource_visitor { public: - add_uniform_to_shader(struct gl_shader_program *shader_program, + add_uniform_to_shader(struct gl_context *ctx, + struct gl_shader_program *shader_program, struct gl_program_parameter_list *params) : ctx(ctx), params(params), idx(-1) { /* empty */ } void process(ir_variable *var) { this->idx = -1; this->var = var; @@ -2475,27 +2476,28 @@ add_uniform_to_shader::visit_field(const glsl_type *type, const char *name, /** * Generate the program parameters list for the user uniforms in a shader * * \param shader_program Linked shader program. This is only used to * emit possible link errors to the info log. * \param sh Shader whose uniforms are to be processed. * \param params Parameter list to be filled in. */ void -_mesa_generate_parameters_list_for_uniforms(struct gl_shader_program +_mesa_generate_parameters_list_for_uniforms(struct gl_context *ctx, +struct gl_shader_program *shader_program, struct gl_linked_shader *sh, struct gl_program_parameter_list *params) { - add_uniform_to_shader add(shader_program, params); + add_uniform_to_shader add(ctx, shader_program, params); foreach_in_list(ir_instruction, node, sh->ir) { ir_variable *var = node->as_variable(); if ((var == NULL) || (var->data.mode != ir_var_uniform) || var->is_in_buffer_block() || (strncmp(var->name, "gl_", 3) == 0)) continue; add.process(var); } @@ -2843,21 +2845,21 @@ get_mesa_program(struct gl_context *ctx, validate_ir_tree(shader->ir); prog = shader->Program; prog->Parameters = _mesa_new_parameter_list(); v.ctx = ctx; v.prog = prog; v.shader_program = shader_program; v.options = options; - _mesa_generate_parameters_list_for_uniforms(shader_program, shader, + _mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader, prog->Parameters); /* Emit Mesa IR for main(). */ visit_exec_list(shader->ir, ); v.emit(NULL, OPCODE_END); prog->arb.NumTemporaries = v.next_temp; unsigned num_instructions = v.instructions.length(); diff --git a/src/mesa/program/ir_to_mesa.h b/src/mesa/program/ir_to_mesa.h index e3d364455c..9714f50443 100644 --- a/src/mesa/program/ir_to_mesa.h +++ b/src/mesa/program/ir_to_mesa.h @@ -31,21 +31,22 @@ extern "C" { #endif struct gl_context; struct gl_shader; struct gl_shader_program; void _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); GLboolean _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); void -_mesa_generate_parameters_list_for_uniforms(struct gl_shader_program +_mesa_generate_parameters_list_for_uniforms(struct gl_context *ctx, +struct gl_shader_program *shader_program, struct gl_linked_shader *sh, struct gl_program_parameter_list *params); void _mesa_associate_uniform_storage(struct gl_context *ctx, struct gl_shader_program *shader_program, struct gl_program *prog, bool propagate_to_storage); diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index a2522070b1..38d6c7068c 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -393,21 +393,21 @@ st_nir_get_mesa_program(struct gl_context *ctx, validate_ir_tree(shader->ir); prog = shader->Program; prog->Parameters = _mesa_new_parameter_list();
[Mesa-dev] [PATCH 1/7] gallium: add CONSTBUF type to tgsi_file_type
This will be use to distinguish between load types when using the TGSI_OPCODE_LOAD opcode. --- src/gallium/auxiliary/tgsi/tgsi_strings.c | 1 + src/gallium/include/pipe/p_shader_tokens.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c index 7ce12d3655..0872db9ce8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c @@ -50,20 +50,21 @@ static const char *tgsi_file_names[] = "OUT", "TEMP", "SAMP", "ADDR", "IMM", "SV", "IMAGE", "SVIEW", "BUFFER", "MEMORY", + "CONSTBUF", }; const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] = { "POSITION", "COLOR", "BCOLOR", "FOG", "PSIZE", "GENERIC", diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index aa0fb3e3b3..f9cb6183ce 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -67,20 +67,21 @@ enum tgsi_file_type { TGSI_FILE_OUTPUT, TGSI_FILE_TEMPORARY, TGSI_FILE_SAMPLER, TGSI_FILE_ADDRESS, TGSI_FILE_IMMEDIATE, TGSI_FILE_SYSTEM_VALUE, TGSI_FILE_IMAGE, TGSI_FILE_SAMPLER_VIEW, TGSI_FILE_BUFFER, TGSI_FILE_MEMORY, + TGSI_FILE_CONSTBUF, TGSI_FILE_COUNT, /**< how many TGSI_FILE_ types */ }; #define TGSI_WRITEMASK_NONE 0x00 #define TGSI_WRITEMASK_X0x01 #define TGSI_WRITEMASK_Y0x02 #define TGSI_WRITEMASK_XY 0x03 #define TGSI_WRITEMASK_Z0x04 #define TGSI_WRITEMASK_XZ 0x05 -- 2.13.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/7] gallium: introduce PIPE_CAP_LOAD_CONSTBUF
--- src/gallium/docs/source/screen.rst | 2 ++ src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 + src/gallium/drivers/freedreno/freedreno_screen.c | 1 + src/gallium/drivers/i915/i915_screen.c | 1 + src/gallium/drivers/llvmpipe/lp_screen.c | 1 + src/gallium/drivers/nouveau/nv30/nv30_screen.c | 1 + src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 1 + src/gallium/drivers/r300/r300_screen.c | 1 + src/gallium/drivers/r600/r600_pipe.c | 1 + src/gallium/drivers/radeonsi/si_pipe.c | 1 + src/gallium/drivers/softpipe/sp_screen.c | 1 + src/gallium/drivers/svga/svga_screen.c | 1 + src/gallium/drivers/swr/swr_screen.cpp | 1 + src/gallium/drivers/vc4/vc4_screen.c | 1 + src/gallium/drivers/virgl/virgl_screen.c | 1 + src/gallium/include/pipe/p_defines.h | 1 + 17 files changed, 18 insertions(+) diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index be14ddd0c0..0cfe20ec9b 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -397,20 +397,22 @@ The integer capabilities: * ``PIPE_CAP_BINDLESS_TEXTURE``: Whether bindless texture operations are supported. * ``PIPE_CAP_NIR_SAMPLERS_AS_DEREF``: Whether NIR tex instructions should reference texture and sampler as NIR derefs instead of by indices. * ``PIPE_CAP_QUERY_SO_OVERFLOW``: Whether the ``PIPE_QUERY_SO_OVERFLOW_PREDICATE`` and ``PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE`` query types are supported. Note that for a driver that does not support multiple output streams (i.e., ``PIPE_CAP_MAX_VERTEX_STREAMS`` is 1), both query types are identical. * ``PIPE_CAP_MEMOBJ``: Whether operations on memory objects are supported. +* ``PIPE_CAP_LOAD_CONSTBUF``: True if the driver supports TGSI_OPCODE_LOAD use + with constant buffers. .. _pipe_capf: PIPE_CAPF_* The floating-point capabilities are: * ``PIPE_CAPF_MAX_LINE_WIDTH``: The maximum width of a regular line. diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c index f400e423de..49c700a3d0 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c @@ -256,20 +256,21 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE: case PIPE_CAP_TGSI_BALLOT: case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: case PIPE_CAP_POST_DEPTH_COVERAGE: case PIPE_CAP_BINDLESS_TEXTURE: case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: case PIPE_CAP_QUERY_SO_OVERFLOW: case PIPE_CAP_MEMOBJ: + case PIPE_CAP_LOAD_CONSTBUF: return 0; /* Stream output. */ case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: return 0; /* Geometry shader output, unsupported. */ diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index b26f67e4e2..425d967015 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -317,20 +317,21 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE: case PIPE_CAP_TGSI_BALLOT: case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: case PIPE_CAP_POST_DEPTH_COVERAGE: case PIPE_CAP_BINDLESS_TEXTURE: case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: case PIPE_CAP_QUERY_SO_OVERFLOW: case PIPE_CAP_MEMOBJ: + case PIPE_CAP_LOAD_CONSTBUF: return 0; case PIPE_CAP_MAX_VIEWPORTS: return 1; case PIPE_CAP_SHAREABLE_SHADERS: case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: /* manage the variants for these ourself, to avoid breaking precompile: */ case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: case PIPE_CAP_VERTEX_COLOR_CLAMPED: diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index e700e294da..749dc9cb53 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -306,20 +306,21 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_TGSI_CLOCK: case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE: case PIPE_CAP_TGSI_BALLOT: case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: case
[Mesa-dev] [PATCH 2/7] mesa/st: create add_buffer_to_load_and_stores() helper
Will be used to add LOAD support to UBOs. Reviewed-by: Marek Olšák--- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 46 ++ 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 9688400ed4..f77c85a944 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1240,20 +1240,46 @@ attrib_type_size(const struct glsl_type *type, bool is_vs_input) { return type->count_attribute_slots(is_vs_input); } static int type_size(const struct glsl_type *type) { return type->count_attribute_slots(false); } +static void +add_buffer_to_load_and_stores(glsl_to_tgsi_instruction *inst, st_src_reg *buf, + exec_list *instructions, ir_constant *access) +{ + /** +* emit_asm() might have actually split the op into pieces, e.g. for +* double stores. We have to go back and fix up all the generated ops. +*/ + unsigned op = inst->op; + do { + inst->resource = *buf; + if (access) + inst->buffer_access = access->value.u[0]; + + if (inst == instructions->get_head_raw()) + break; + inst = (glsl_to_tgsi_instruction *)inst->get_prev(); + + if (inst->op == TGSI_OPCODE_UADD) { + if (inst == instructions->get_head_raw()) +break; + inst = (glsl_to_tgsi_instruction *)inst->get_prev(); + } + } while (inst->op == op && inst->resource.file == PROGRAM_UNDEFINED); +} + /** * If the given GLSL type is an array or matrix or a structure containing * an array/matrix member, return true. Else return false. * * This is used to determine which kind of temp storage (PROGRAM_TEMPORARY * or PROGRAM_ARRAY) should be used for variables of this type. Anytime * we have an array that might be indexed with a variable, we need to use * the later storage type. */ static bool @@ -3635,39 +3661,21 @@ glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir) inst = emit_asm(ir, opcode, dst, off, data, data2); } param = param->get_next(); ir_constant *access = NULL; if (!param->is_tail_sentinel()) { access = ((ir_instruction *)param)->as_constant(); assert(access); } - /* The emit_asm() might have actually split the op into pieces, e.g. for -* double stores. We have to go back and fix up all the generated ops. -*/ - unsigned op = inst->op; - do { - inst->resource = buffer; - if (access) - inst->buffer_access = access->value.u[0]; - - if (inst == this->instructions.get_head_raw()) - break; - inst = (glsl_to_tgsi_instruction *)inst->get_prev(); - - if (inst->op == TGSI_OPCODE_UADD) { - if (inst == this->instructions.get_head_raw()) -break; - inst = (glsl_to_tgsi_instruction *)inst->get_prev(); - } - } while (inst->op == op && inst->resource.file == PROGRAM_UNDEFINED); + add_buffer_to_load_and_stores(inst, , >instructions, access); } void glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir) { switch (ir->callee->intrinsic_id) { case ir_intrinsic_memory_barrier: emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER | -- 2.13.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/7] radeonsi: make use of LOAD for UBOs
v2: always set can_speculate and allow_smem to true --- src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 31 +++ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index f8c99ff7e7..83cd8cd938 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -66,32 +66,36 @@ static LLVMValueRef get_buffer_size( LLVMConstInt(ctx->i32, 0x3FFF, 0), ""); size = LLVMBuildUDiv(builder, size, stride, ""); } return size; } static LLVMValueRef shader_buffer_fetch_rsrc(struct si_shader_context *ctx, -const struct tgsi_full_src_register *reg) +const struct tgsi_full_src_register *reg, +bool ubo) { LLVMValueRef index; if (!reg->Register.Indirect) { index = LLVMConstInt(ctx->i32, reg->Register.Index, false); } else { index = si_get_indirect_index(ctx, >Indirect, reg->Register.Index); } - return ctx->abi.load_ssbo(>abi, index, false); + if (ubo) + return ctx->abi.load_ubo(>abi, index); + else + return ctx->abi.load_ssbo(>abi, index, false); } static bool tgsi_is_array_sampler(unsigned target) { return target == TGSI_TEXTURE_1D_ARRAY || target == TGSI_TEXTURE_SHADOW1D_ARRAY || target == TGSI_TEXTURE_2D_ARRAY || target == TGSI_TEXTURE_SHADOW2D_ARRAY || target == TGSI_TEXTURE_CUBE_ARRAY || target == TGSI_TEXTURE_SHADOWCUBE_ARRAY || @@ -356,26 +360,28 @@ static void load_fetch_args( struct lp_build_emit_data * emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = >gallivm; const struct tgsi_full_instruction * inst = emit_data->inst; unsigned target = inst->Memory.Texture; LLVMValueRef rsrc; emit_data->dst_type = ctx->v4f32; - if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { + if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || + inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef offset; LLVMValueRef tmp; - rsrc = shader_buffer_fetch_rsrc(ctx, >Src[0]); + bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF; + rsrc = shader_buffer_fetch_rsrc(ctx, >Src[0], ubo); tmp = lp_build_emit_fetch(bld_base, inst, 1, 0); offset = LLVMBuildBitCast(builder, tmp, ctx->i32, ""); buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0, offset, false, false); } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE || tgsi_is_bindless_image_file(inst->Src[0].Register.File)) { LLVMValueRef coords; @@ -407,21 +413,21 @@ static unsigned get_load_intr_attribs(bool can_speculate) static unsigned get_store_intr_attribs(bool writeonly_memory) { return writeonly_memory && HAVE_LLVM >= 0x0400 ? LP_FUNC_ATTR_INACCESSIBLE_MEM_ONLY : LP_FUNC_ATTR_WRITEONLY; } static void load_emit_buffer(struct si_shader_context *ctx, struct lp_build_emit_data *emit_data, -bool can_speculate) +bool can_speculate, bool allow_smem) { const struct tgsi_full_instruction *inst = emit_data->inst; uint writemask = inst->Dst[0].Register.WriteMask; uint count = util_last_bit(writemask); LLVMValueRef *args = emit_data->args; /* Don't use SMEM for shader buffer loads, because LLVM doesn't * select SMEM for SI.load.const with a non-constant offset, and * constant offsets practically don't exist with shader buffers. * @@ -434,21 +440,21 @@ static void load_emit_buffer(struct si_shader_context *ctx, * After that, si_memory_barrier should invalidate sL1 for shader * buffers. */ assert(LLVMConstIntGetZExtValue(args[1]) == 0); /* vindex */ emit_data->output[emit_data->chan] = ac_build_buffer_load(>ac, args[0], count, NULL, args[2], NULL, 0, LLVMConstIntGetZExtValue(args[3]), LLVMConstIntGetZExtValue(args[4]), -can_speculate, false); +can_speculate, allow_smem); } static
[Mesa-dev] V2 radeonsi use STD430 packing of UBOs by default
I'm a little unsure what to do with this now. Below is my shader-db results, the majority of negative changes are from Natural Selection 2. I looked at some dumps of the worst Natural Selection 2 shaders and it seems to just be scheduling differences causing the regressions. I tested with sisched but that just made things even worse. Obviously we should be aiming to improve the schedulare, but since this regresses things and I have no evidence of it helping anything it makes the case for adding it pretty weak. Thoughts?? PERCENTAGE DELTASShaders SGPRs VGPRs SpillSGPR MaxWaves All affected57972.92 3.05 %5.04 % -2.94 --- Total 722870.28 %0.34 %0.33 % -0.21 % ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/7] mesa/st: add LOAD support for UBOs
This will allow us to use STD430 packing by default if the driver supports it. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 198 + 1 file changed, 119 insertions(+), 79 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index f77c85a944..9264d18a3f 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -2177,105 +2177,140 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) case ir_binop_bit_or: if (native_integers) { emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); break; } assert(!"GLSL 1.30 features unsupported"); break; case ir_binop_ubo_load: { - ir_constant *const_uniform_block = ir->operands[0]->as_constant(); - ir_constant *const_offset_ir = ir->operands[1]->as_constant(); - unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0; - unsigned const_block = const_uniform_block ? const_uniform_block->value.u[0] + 1 : 1; - st_src_reg index_reg = get_temp(glsl_type::uint_type); - st_src_reg cbuf; - - cbuf.type = ir->type->base_type; - cbuf.file = PROGRAM_CONSTANT; - cbuf.index = 0; - cbuf.reladdr = NULL; - cbuf.negate = 0; - cbuf.abs = 0; - cbuf.index2D = const_block; - - assert(ir->type->is_vector() || ir->type->is_scalar()); - - if (const_offset_ir) { - /* Constant index into constant buffer */ - cbuf.reladdr = NULL; - cbuf.index = const_offset / 16; - } - else { - ir_expression *offset_expr = ir->operands[1]->as_expression(); - st_src_reg offset = op[1]; - - /* The OpenGL spec is written in such a way that accesses with - * non-constant offset are almost always vec4-aligned. The only - * exception to this are members of structs in arrays of structs: - * each struct in an array of structs is at least vec4-aligned, - * but single-element and [ui]vec2 members of the struct may be at - * an offset that is not a multiple of 16 bytes. - * - * Here, we extract that offset, relying on previous passes to always - * generate offset expressions of the form (+ expr constant_offset). - * - * Note that the std430 layout, which allows more cases of alignment - * less than vec4 in arrays, is not supported for uniform blocks, so - * we do not have to deal with it here. - */ - if (offset_expr && offset_expr->operation == ir_binop_add) { -const_offset_ir = offset_expr->operands[1]->as_constant(); -if (const_offset_ir) { - const_offset = const_offset_ir->value.u[0]; - cbuf.index = const_offset / 16; - offset_expr->operands[0]->accept(this); - offset = this->result; -} - } + if (ctx->Const.UseSTD430AsDefaultPacking) { + ir_rvalue *block = ir->operands[0]; + ir_rvalue *offset = ir->operands[1]; + ir_constant *const_block = block->as_constant(); - /* Relative/variable index into constant buffer */ - emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), offset, - st_src_reg_for_int(4)); - cbuf.reladdr = ralloc(mem_ctx, st_src_reg); - memcpy(cbuf.reladdr, _reg, sizeof(index_reg)); - } + st_src_reg cbuf(PROGRAM_CONSTANT, +(const_block ? const_block->value.u[0] + 1 : 1), +ir->type->base_type); - if (const_uniform_block) { - /* Constant constant buffer */ - cbuf.reladdr2 = NULL; cbuf.has_index2 = true; - } - else { - /* Relative/variable constant buffer */ - cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg); - memcpy(cbuf.reladdr2, [0], sizeof(st_src_reg)); - cbuf.has_index2 = true; - } - cbuf.swizzle = swizzle_for_size(ir->type->vector_elements); - if (glsl_base_type_is_64bit(cbuf.type)) - cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8, - const_offset % 16 / 8, - const_offset % 16 / 8, - const_offset % 16 / 8); - else - cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4, - const_offset % 16 / 4, - const_offset % 16 / 4, - const_offset % 16 / 4); + if (!const_block) { +block->accept(this); +cbuf.reladdr = ralloc(mem_ctx, st_src_reg); +*cbuf.reladdr = this->result; +emit_arl(ir, sampler_reladdr, this->result); + } + + /* Calculate the surface offset */ + offset->accept(this); +
Re: [Mesa-dev] [PATCH] radeonsi/gfx9: add a temporary workaround for a tessellation driver bug
On 22.08.2017 13:00, Marek Olšák wrote: On Tue, Aug 22, 2017 at 9:37 AM, Nicolai Hähnlewrote: On 18.08.2017 19:06, Marek Olšák wrote: Ping. On Wed, Aug 16, 2017 at 12:57 AM, Marek Olšák wrote: From: Marek Olšák The workaround will do for now. The root cause is still unknown. This fixes new piglit: 16in-1out I don't see this test. Did you already send it out? "[PATCH] arb_tessellation_shader: new tests for a radeonsi bug" on the piglit ML. Curious, I can't reproduce the problem on my Raven. Marek -- Lerne, wie die Welt wirklich ist, Aber vergiss niemals, wie sie sein sollte. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 03/11] glsl: pass UseSTD430AsDefaultPacking to where it will be used
On 22/08/17 18:36, Nicolai Hähnle wrote: On 17.08.2017 13:03, Timothy Arceri wrote: diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 0e6a95ce99..d04ea67b07 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2411,39 +2411,41 @@ print_program(struct prog_instruction *mesa_instructions, PROG_PRINT_DEBUG, NULL); } } namespace { class add_uniform_to_shader : public program_resource_visitor { public: add_uniform_to_shader(struct gl_shader_program *shader_program, struct gl_program_parameter_list *params) - : shader_program(shader_program), params(params), idx(-1) + : ctx(ctx), shader_program(shader_program), params(params), idx(-1) Looks like you pushed a version of this with a problem: ../../../mesa/src/mesa/program/ir_to_mesa.cpp: In constructor ‘{anonymous}::add_uniform_to_shader::add_uniform_to_shader(gl_shader_program*, gl_program_parameter_list*)’: ../../../mesa/src/mesa/program/ir_to_mesa.cpp:2419:4: warning: ‘{anonymous}::add_uniform_to_shader::ctx’ is initialized with itself [-Winit-self] add_uniform_to_shader(struct gl_shader_program *shader_program, > ^ Thanks, not sure how I missed this, possibly because builds are becoming increasingly noise with unfixed warnings. Anyway fix pushed. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] st/glsl_to_tgsi: fix getting the image type for array of structs (again)
On 08/22/2017 12:54 PM, Timothy Arceri wrote: On 22/08/17 20:34, Samuel Pitoiset wrote: We want the type of the field, not of the struct. This fixes a regression in the following piglit test: arb_bindless_texture/compiler/images/arrays-of-struct.frag This is the second time this test is broken, please run piglit with assertions enabled next time. Well we should probably be catching this with a test rather then relying on an assert. Anyway thanks for catching and fixing it :) Yeah, probably something like that. :) Please remove the above sentence before committing. In case your not aware you can use --annotate when sending and put this sort of thing under the --- below rather than the commit message. Reviewed-by: Timothy ArceriFixes: 49d9286a3f ("glsl: stop copying struct and interface member names") Signed-off-by: Samuel Pitoiset --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 +++- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 9688400ed4..2f84d2c046 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3796,12 +3796,10 @@ get_image_qualifiers(ir_dereference *ir, const glsl_type **type, switch (ir->ir_type) { case ir_type_dereference_record: { ir_dereference_record *deref_record = ir->as_dereference_record(); - - *type = deref_record->type; - - const glsl_type *struct_type = - deref_record->record->type->without_array(); + const glsl_type *struct_type = deref_record->record->type; int fild_idx = deref_record->field_idx; + + *type = struct_type->fields.structure[fild_idx].type->without_array(); *memory_coherent = struct_type->fields.structure[fild_idx].memory_coherent; *memory_volatile = ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev