On Wed, Feb 7, 2018 at 7:47 PM, Daniel Schürmann <daniel.schuerm...@campus.tu-berlin.de> wrote: > Signed-off-by: Daniel Schürmann <daniel.schuerm...@campus.tu-berlin.de> > --- > src/amd/common/ac_llvm_build.c | 25 +++++++++++++++++++++++++ > src/amd/common/ac_llvm_build.h | 8 ++++++++ > src/amd/common/ac_nir_to_llvm.c | 14 ++++++++++++-- > 3 files changed, 45 insertions(+), 2 deletions(-) > > diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c > index d0b288f51d..197dc40eae 100644 > --- a/src/amd/common/ac_llvm_build.c > +++ b/src/amd/common/ac_llvm_build.c > @@ -1046,6 +1046,31 @@ LLVMValueRef ac_build_buffer_load_format(struct > ac_llvm_context *ctx, > can_speculate, true); > } > > +LLVMValueRef > +ac_build_tbuffer_load_short(struct ac_llvm_context *ctx, > + LLVMValueRef rsrc, > + LLVMValueRef vindex, > + LLVMValueRef voffset, > + LLVMValueRef soffset, > + LLVMValueRef immoffset) > +{ > + const char *name = "llvm.amdgcn.tbuffer.load.i32"; > + LLVMTypeRef type = ctx->i32; > + LLVMValueRef params[] = { > + rsrc, > + vindex, > + voffset, > + soffset, > + immoffset, > + LLVMConstInt(ctx->i32, > V_008F0C_BUF_DATA_FORMAT_16, false), > + LLVMConstInt(ctx->i32, > V_008F0C_BUF_NUM_FORMAT_UINT, false), > + ctx->i1false, > + ctx->i1false, > + }; > + LLVMValueRef res = ac_build_intrinsic(ctx, name, type, params, 9, 0); > + return LLVMBuildTrunc(ctx->builder, res, ctx->i16, ""); > +} > + > /** > * Set range metadata on an instruction. This can only be used on load and > * call instructions. If you know an instruction can only produce the values > diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h > index 47c843fb4b..da49b06502 100644 > --- a/src/amd/common/ac_llvm_build.h > +++ b/src/amd/common/ac_llvm_build.h > @@ -225,6 +225,14 @@ LLVMValueRef ac_build_buffer_load_format(struct > ac_llvm_context *ctx, > bool glc, > bool can_speculate); > > +LLVMValueRef > +ac_build_tbuffer_load_short(struct ac_llvm_context *ctx, > + LLVMValueRef rsrc, > + LLVMValueRef vindex, > + LLVMValueRef voffset, > + LLVMValueRef soffset, > + LLVMValueRef immoffset); > + > LLVMValueRef > ac_get_thread_id(struct ac_llvm_context *ctx); > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index 31f16d3acf..2b48e92949 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -2697,9 +2697,19 @@ static LLVMValueRef visit_load_ubo_buffer(struct > ac_nir_context *ctx, > if (instr->dest.ssa.bit_size == 64) > num_components *= 2; > > - ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, > offset, > + if (instr->dest.ssa.bit_size == 16) { > + LLVMValueRef results[num_components]; > + for (unsigned i = 0; i < num_components; ++i) { > + results[i] = ac_build_tbuffer_load_short(&ctx->ac, > rsrc, > + > ctx->ac.i32_0, ctx->ac.i32_0, > + offset, > LLVMConstInt(ctx->ac.i32, 2 * i, 0)); > + }
For num_components >= 2, it would be better to load it as i32 and then bitcast to v2i16, assuming the offset is aligned to 4. Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev