I like the general idea, we just shouldn't rely too much on the type size later on, especially in regards to CL where we can have unaligned load/stores especially for packed structs.
Acked-by: Karol Herbst <kher...@redhat.com> On Wed, Nov 14, 2018 at 12:24 AM Jason Ekstrand <ja...@jlekstrand.net> wrote: > > This also changes spirv_to_nir and glsl_to_nir to set them. The one > place that doesn't set them is shared memory access lowering in > nir_lower_io. That will have to be updated before any consumers of it > can effectively use these new alignments. > --- > src/compiler/glsl/glsl_to_nir.cpp | 14 +++++++ > src/compiler/nir/nir.h | 41 ++++++++++++++++++++ > src/compiler/nir/nir_intrinsics.py | 26 ++++++++----- > src/compiler/nir/nir_lower_atomics_to_ssbo.c | 4 ++ > src/compiler/nir/nir_print.c | 2 + > src/compiler/spirv/spirv_to_nir.c | 2 + > src/compiler/spirv/vtn_variables.c | 6 +++ > 7 files changed, 85 insertions(+), 10 deletions(-) > > diff --git a/src/compiler/glsl/glsl_to_nir.cpp > b/src/compiler/glsl/glsl_to_nir.cpp > index 9bb0f5d4044..9f73b721e39 100644 > --- a/src/compiler/glsl/glsl_to_nir.cpp > +++ b/src/compiler/glsl/glsl_to_nir.cpp > @@ -33,6 +33,7 @@ > #include "compiler/nir/nir_builder.h" > #include "main/imports.h" > #include "main/mtypes.h" > +#include "util/u_math.h" > > /* > * pass to lower GLSL IR to NIR > @@ -603,6 +604,14 @@ nir_visitor::visit(ir_return *ir) > nir_builder_instr_insert(&b, &instr->instr); > } > > +static void > +intrinsic_set_std430_align(nir_intrinsic_instr *intrin, const glsl_type > *type) > +{ > + unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type); > + unsigned pow2_components = util_next_power_of_two(type->vector_elements); > + nir_intrinsic_set_align(intrin, (bit_size / 8) * pow2_components, 0); > +} > + > void > nir_visitor::visit(ir_call *ir) > { > @@ -1006,6 +1015,7 @@ nir_visitor::visit(ir_call *ir) > instr->src[0] = nir_src_for_ssa(nir_val); > instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block)); > instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset)); > + intrinsic_set_std430_align(instr, val->type); > nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]); > instr->num_components = val->type->vector_elements; > > @@ -1024,6 +1034,7 @@ nir_visitor::visit(ir_call *ir) > > const glsl_type *type = ir->return_deref->var->type; > instr->num_components = type->vector_elements; > + intrinsic_set_std430_align(instr, type); > > /* Setup destination register */ > unsigned bit_size = type->is_boolean() ? 32 : > glsl_get_bit_size(type); > @@ -1101,6 +1112,7 @@ nir_visitor::visit(ir_call *ir) > > const glsl_type *type = ir->return_deref->var->type; > instr->num_components = type->vector_elements; > + intrinsic_set_std430_align(instr, type); > > /* Setup destination register */ > unsigned bit_size = type->is_boolean() ? 32 : > glsl_get_bit_size(type); > @@ -1131,6 +1143,7 @@ nir_visitor::visit(ir_call *ir) > > instr->src[0] = nir_src_for_ssa(nir_val); > instr->num_components = val->type->vector_elements; > + intrinsic_set_std430_align(instr, val->type); > > nir_builder_instr_insert(&b, &instr->instr); > break; > @@ -1388,6 +1401,7 @@ nir_visitor::visit(ir_expression *ir) > load->num_components = ir->type->vector_elements; > load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0])); > load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); > + intrinsic_set_std430_align(load, ir->type); > add_instr(&load->instr, ir->type->vector_elements, bit_size); > > /* > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > index c469e111b2c..41d61dc8105 100644 > --- a/src/compiler/nir/nir.h > +++ b/src/compiler/nir/nir.h > @@ -34,6 +34,7 @@ > #include "util/list.h" > #include "util/ralloc.h" > #include "util/set.h" > +#include "util/bitscan.h" > #include "util/bitset.h" > #include "util/macros.h" > #include "compiler/nir_types.h" > @@ -1248,6 +1249,18 @@ typedef enum { > */ > NIR_INTRINSIC_ACCESS = 16, > > + /** > + * Alignment for offsets and addresses > + * > + * These two parameters, specify an alignment in terms of a multiplier and > + * an offset. The offset or address parameter X of the intrinsic is > + * guaranteed to satisfy the following: > + * > + * (X - align_offset) % align_mul == 0 > + */ > + NIR_INTRINSIC_ALIGN_MUL = 17, > + NIR_INTRINSIC_ALIGN_OFFSET = 18, > + > NIR_INTRINSIC_NUM_INDEX_FLAGS, > > } nir_intrinsic_index_flag; > @@ -1342,6 +1355,34 @@ INTRINSIC_IDX_ACCESSORS(image_dim, IMAGE_DIM, enum > glsl_sampler_dim) > INTRINSIC_IDX_ACCESSORS(image_array, IMAGE_ARRAY, bool) > INTRINSIC_IDX_ACCESSORS(access, ACCESS, enum gl_access_qualifier) > INTRINSIC_IDX_ACCESSORS(format, FORMAT, unsigned) > +INTRINSIC_IDX_ACCESSORS(align_mul, ALIGN_MUL, unsigned) > +INTRINSIC_IDX_ACCESSORS(align_offset, ALIGN_OFFSET, unsigned) > + > +static inline void > +nir_intrinsic_set_align(nir_intrinsic_instr *intrin, > + unsigned align_mul, unsigned align_offset) > +{ > + assert(util_is_power_of_two_nonzero(align_mul)); > + assert(align_offset < align_mul); > + nir_intrinsic_set_align_mul(intrin, align_mul); > + nir_intrinsic_set_align_offset(intrin, align_offset); > +} > + > +/** Returns a simple alignment for a load/store intrinsic offset > + * > + * Instead of the full mul+offset alignment scheme provided by the ALIGN_MUL > + * and ALIGN_OFFSET parameters, this helper takes both into account and > + * provides a single simple alignment parameter. The offset X is guaranteed > + * to satisfy X % align == 0. > + */ > +static inline unsigned > +nir_intrinsic_align(nir_intrinsic_instr *intrin) > +{ > + const unsigned align_mul = nir_intrinsic_align_mul(intrin); > + const unsigned align_offset = nir_intrinsic_align_offset(intrin); > + assert(align_offset < align_mul); > + return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; > +} > > /** > * \group texture information > diff --git a/src/compiler/nir/nir_intrinsics.py > b/src/compiler/nir/nir_intrinsics.py > index ec3049ca06d..735db43d16a 100644 > --- a/src/compiler/nir/nir_intrinsics.py > +++ b/src/compiler/nir/nir_intrinsics.py > @@ -109,6 +109,9 @@ IMAGE_ARRAY = "NIR_INTRINSIC_IMAGE_ARRAY" > ACCESS = "NIR_INTRINSIC_ACCESS" > # Image format for image intrinsics > FORMAT = "NIR_INTRINSIC_FORMAT" > +# Offset or address alignment > +ALIGN_MUL = "NIR_INTRINSIC_ALIGN_MUL" > +ALIGN_OFFSET = "NIR_INTRINSIC_ALIGN_OFFSET" > > # > # Possible flags: > @@ -554,8 +557,8 @@ def load(name, num_srcs, indices=[], flags=[]): > > # src[] = { offset }. const_index[] = { base, range } > load("uniform", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER]) > -# src[] = { buffer_index, offset }. No const_index > -load("ubo", 2, flags=[CAN_ELIMINATE, CAN_REORDER]) > +# src[] = { buffer_index, offset }. const_index[] = { align_mul, > align_offset } > +load("ubo", 2, [ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE, CAN_REORDER]) > # src[] = { offset }. const_index[] = { base, component } > load("input", 1, [BASE, COMPONENT], [CAN_ELIMINATE, CAN_REORDER]) > # src[] = { vertex, offset }. const_index[] = { base, component } > @@ -564,14 +567,15 @@ load("per_vertex_input", 2, [BASE, COMPONENT], > [CAN_ELIMINATE, CAN_REORDER]) > intrinsic("load_interpolated_input", src_comp=[2, 1], dest_comp=0, > indices=[BASE, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER]) > > -# src[] = { buffer_index, offset }. No const_index > -load("ssbo", 2, flags=[CAN_ELIMINATE], indices=[ACCESS]) > +# src[] = { buffer_index, offset }. > +# const_index[] = { access, align_mul, align_offset } > +load("ssbo", 2, [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) > # src[] = { offset }. const_index[] = { base, component } > load("output", 1, [BASE, COMPONENT], flags=[CAN_ELIMINATE]) > # src[] = { vertex, offset }. const_index[] = { base } > load("per_vertex_output", 2, [BASE, COMPONENT], [CAN_ELIMINATE]) > -# src[] = { offset }. const_index[] = { base } > -load("shared", 1, [BASE], [CAN_ELIMINATE]) > +# src[] = { offset }. const_index[] = { base, align_mul, align_offset } > +load("shared", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) > # src[] = { offset }. const_index[] = { base, range } > load("push_constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER]) > # src[] = { offset }. const_index[] = { base, range } > @@ -590,7 +594,9 @@ store("output", 2, [BASE, WRMASK, COMPONENT]) > # src[] = { value, vertex, offset }. > # const_index[] = { base, write_mask, component } > store("per_vertex_output", 3, [BASE, WRMASK, COMPONENT]) > -# src[] = { value, block_index, offset }. const_index[] = { write_mask } > -store("ssbo", 3, [WRMASK, ACCESS]) > -# src[] = { value, offset }. const_index[] = { base, write_mask } > -store("shared", 2, [BASE, WRMASK]) > +# src[] = { value, block_index, offset } > +# const_index[] = { write_mask, access, align_mul, align_offset } > +store("ssbo", 3, [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET]) > +# src[] = { value, offset }. > +# const_index[] = { base, write_mask, align_mul, align_offset } > +store("shared", 2, [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET]) > diff --git a/src/compiler/nir/nir_lower_atomics_to_ssbo.c > b/src/compiler/nir/nir_lower_atomics_to_ssbo.c > index c165a03b141..cdc660981fc 100644 > --- a/src/compiler/nir/nir_lower_atomics_to_ssbo.c > +++ b/src/compiler/nir/nir_lower_atomics_to_ssbo.c > @@ -149,6 +149,10 @@ lower_instr(nir_intrinsic_instr *instr, unsigned > ssbo_offset, nir_builder *b) > break; > } > > + if (new_instr->intrinsic == nir_intrinsic_load_ssbo || > + new_instr->intrinsic == nir_intrinsic_store_ssbo) > + nir_intrinsic_set_align(new_instr, 4, 0); > + > nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, > instr->dest.ssa.num_components, > instr->dest.ssa.bit_size, NULL); > diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c > index ab3d5115688..e20c28fec87 100644 > --- a/src/compiler/nir/nir_print.c > +++ b/src/compiler/nir/nir_print.c > @@ -691,6 +691,8 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, > print_state *state) > [NIR_INTRINSIC_IMAGE_ARRAY] = "image_array", > [NIR_INTRINSIC_ACCESS] = "access", > [NIR_INTRINSIC_FORMAT] = "format", > + [NIR_INTRINSIC_ALIGN_MUL] = "align_mul", > + [NIR_INTRINSIC_ALIGN_OFFSET] = "align_offset", > }; > for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) { > if (!info->index_map[idx]) > diff --git a/src/compiler/spirv/spirv_to_nir.c > b/src/compiler/spirv/spirv_to_nir.c > index 96ff09c3659..11a283ccf8f 100644 > --- a/src/compiler/spirv/spirv_to_nir.c > +++ b/src/compiler/spirv/spirv_to_nir.c > @@ -2742,6 +2742,7 @@ vtn_handle_atomics(struct vtn_builder *b, SpvOp opcode, > switch (opcode) { > case SpvOpAtomicLoad: > atomic->num_components = glsl_get_vector_elements(ptr->type->type); > + nir_intrinsic_set_align(atomic, 4, 0); > if (ptr->mode == vtn_variable_mode_ssbo) > atomic->src[src++] = nir_src_for_ssa(index); > atomic->src[src++] = nir_src_for_ssa(offset); > @@ -2750,6 +2751,7 @@ vtn_handle_atomics(struct vtn_builder *b, SpvOp opcode, > case SpvOpAtomicStore: > atomic->num_components = glsl_get_vector_elements(ptr->type->type); > nir_intrinsic_set_write_mask(atomic, (1 << atomic->num_components) > - 1); > + nir_intrinsic_set_align(atomic, 4, 0); > atomic->src[src++] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def); > if (ptr->mode == vtn_variable_mode_ssbo) > atomic->src[src++] = nir_src_for_ssa(index); > diff --git a/src/compiler/spirv/vtn_variables.c > b/src/compiler/spirv/vtn_variables.c > index e1e2c8c26ba..53f52255dc3 100644 > --- a/src/compiler/spirv/vtn_variables.c > +++ b/src/compiler/spirv/vtn_variables.c > @@ -646,6 +646,12 @@ _vtn_load_store_tail(struct vtn_builder *b, > nir_intrinsic_op op, bool load, > nir_intrinsic_set_access(instr, access); > } > > + /* With extensions like relaxed_block_layout, we really can't guarantee > + * much more than scalar alignment. > + */ > + if (op != nir_intrinsic_load_push_constant) > + nir_intrinsic_set_align(instr, data_bit_size / 8, 0); > + > if (index) > instr->src[src++] = nir_src_for_ssa(index); > > -- > 2.19.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev