On Thu, Jan 4, 2018 at 10:25 AM, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > How about performance? > > Few weeks ago, I fixed a bug (5f81a43535e8512cef26ea3dcd1e3a489bd5a1bb) > which affected F1 2017 and DOW3 on RADV, and it was also a nice performance > boost, this is why I'm asking.
No idea. This just decreases the number of instructions in some PS epilogs. Marek > > > On 01/04/2018 01:55 AM, Marek Olšák wrote: >> >> From: Marek Olšák <marek.ol...@amd.com> >> >> --- >> src/amd/common/ac_llvm_build.c | 164 >> +++++++++++++++++++++++++++++++ >> src/amd/common/ac_llvm_build.h | 13 +++ >> src/gallium/drivers/radeonsi/si_shader.c | 152 >> ++++++++-------------------- >> 3 files changed, 216 insertions(+), 113 deletions(-) >> >> diff --git a/src/amd/common/ac_llvm_build.c >> b/src/amd/common/ac_llvm_build.c >> index 7100e52..c48a186 100644 >> --- a/src/amd/common/ac_llvm_build.c >> +++ b/src/amd/common/ac_llvm_build.c >> @@ -61,20 +61,21 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, >> LLVMContextRef context, >> ctx->voidt = LLVMVoidTypeInContext(ctx->context); >> ctx->i1 = LLVMInt1TypeInContext(ctx->context); >> ctx->i8 = LLVMInt8TypeInContext(ctx->context); >> ctx->i16 = LLVMIntTypeInContext(ctx->context, 16); >> ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); >> ctx->i64 = LLVMIntTypeInContext(ctx->context, 64); >> ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64; >> ctx->f16 = LLVMHalfTypeInContext(ctx->context); >> ctx->f32 = LLVMFloatTypeInContext(ctx->context); >> ctx->f64 = LLVMDoubleTypeInContext(ctx->context); >> + ctx->v2i16 = LLVMVectorType(ctx->i16, 2); >> ctx->v2i32 = LLVMVectorType(ctx->i32, 2); >> ctx->v3i32 = LLVMVectorType(ctx->i32, 3); >> ctx->v4i32 = LLVMVectorType(ctx->i32, 4); >> ctx->v2f32 = LLVMVectorType(ctx->f32, 2); >> ctx->v4f32 = LLVMVectorType(ctx->f32, 4); >> ctx->v8i32 = LLVMVectorType(ctx->i32, 8); >> ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false); >> ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false); >> ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0); >> @@ -1214,20 +1215,34 @@ LLVMValueRef ac_build_fmin(struct ac_llvm_context >> *ctx, LLVMValueRef a, >> } >> LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef >> a, >> LLVMValueRef b) >> { >> LLVMValueRef args[2] = {a, b}; >> return ac_build_intrinsic(ctx, "llvm.maxnum.f32", ctx->f32, args, >> 2, >> AC_FUNC_ATTR_READNONE); >> } >> +LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a, >> + LLVMValueRef b) >> +{ >> + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, >> ""); >> + return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); >> +} >> + >> +LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a, >> + LLVMValueRef b) >> +{ >> + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, >> ""); >> + return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); >> +} >> + >> LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, >> LLVMValueRef b) >> { >> LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, >> ""); >> return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); >> } >> LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef >> value) >> { >> if (HAVE_LLVM >= 0x0500) { >> @@ -1439,20 +1454,169 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct >> ac_llvm_context *ctx, >> v2f16, args, 2, >> AC_FUNC_ATTR_READNONE); >> return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); >> } >> return ac_build_intrinsic(ctx, "llvm.SI.packf16", ctx->i32, args, >> 2, >> AC_FUNC_ATTR_READNONE | >> AC_FUNC_ATTR_LEGACY); >> } >> +/* Upper 16 bits must be zero. */ >> +static LLVMValueRef ac_llvm_pack_two_int16(struct ac_llvm_context *ctx, >> + LLVMValueRef val[2]) >> +{ >> + return LLVMBuildOr(ctx->builder, val[0], >> + LLVMBuildShl(ctx->builder, val[1], >> + LLVMConstInt(ctx->i32, 16, 0), >> + ""), ""); >> +} >> + >> +/* Upper 16 bits are ignored and will be dropped. */ >> +static LLVMValueRef ac_llvm_pack_two_int32_as_int16(struct >> ac_llvm_context *ctx, >> + LLVMValueRef val[2]) >> +{ >> + LLVMValueRef v[2] = { >> + LLVMBuildAnd(ctx->builder, val[0], >> + LLVMConstInt(ctx->i32, 0xffff, 0), ""), >> + val[1], >> + }; >> + return ac_llvm_pack_two_int16(ctx, v); >> +} >> + >> +LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, >> + LLVMValueRef args[2]) >> +{ >> + if (HAVE_LLVM >= 0x0600) { >> + LLVMValueRef res = >> + ac_build_intrinsic(ctx, >> "llvm.amdgcn.cvt.pknorm.i16", >> + ctx->v2i16, args, 2, >> + AC_FUNC_ATTR_READNONE); >> + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); >> + } >> + >> + LLVMValueRef val[2]; >> + >> + for (int chan = 0; chan < 2; chan++) { >> + /* Clamp between [-1, 1]. */ >> + val[chan] = ac_build_fmin(ctx, args[chan], ctx->f32_1); >> + val[chan] = ac_build_fmax(ctx, val[chan], >> LLVMConstReal(ctx->f32, -1)); >> + /* Convert to a signed integer in [-32767, 32767]. */ >> + val[chan] = LLVMBuildFMul(ctx->builder, val[chan], >> + LLVMConstReal(ctx->f32, 32767), >> ""); >> + /* If positive, add 0.5, else add -0.5. */ >> + val[chan] = LLVMBuildFAdd(ctx->builder, val[chan], >> + LLVMBuildSelect(ctx->builder, >> + LLVMBuildFCmp(ctx->builder, >> LLVMRealOGE, >> + val[chan], >> ctx->f32_0, ""), >> + LLVMConstReal(ctx->f32, 0.5), >> + LLVMConstReal(ctx->f32, -0.5), >> ""), ""); >> + val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], >> ctx->i32, ""); >> + } >> + return ac_llvm_pack_two_int32_as_int16(ctx, val); >> +} >> + >> +LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, >> + LLVMValueRef args[2]) >> +{ >> + if (HAVE_LLVM >= 0x0600) { >> + LLVMValueRef res = >> + ac_build_intrinsic(ctx, >> "llvm.amdgcn.cvt.pknorm.u16", >> + ctx->v2i16, args, 2, >> + AC_FUNC_ATTR_READNONE); >> + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); >> + } >> + >> + LLVMValueRef val[2]; >> + >> + for (int chan = 0; chan < 2; chan++) { >> + val[chan] = ac_build_clamp(ctx, args[chan]); >> + val[chan] = LLVMBuildFMul(ctx->builder, val[chan], >> + LLVMConstReal(ctx->f32, 65535), >> ""); >> + val[chan] = LLVMBuildFAdd(ctx->builder, val[chan], >> + LLVMConstReal(ctx->f32, 0.5), >> ""); >> + val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan], >> + ctx->i32, ""); >> + } >> + return ac_llvm_pack_two_int32_as_int16(ctx, val); >> +} >> + >> +/* The 8-bit and 10-bit clamping is for HW workarounds. */ >> +LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx, >> + LLVMValueRef args[2], unsigned bits, bool >> hi) >> +{ >> + assert(bits == 8 || bits == 10 || bits == 16); >> + >> + LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, >> + bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0); >> + LLVMValueRef min_rgb = LLVMConstInt(ctx->i32, >> + bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0); >> + LLVMValueRef max_alpha = >> + bits != 10 ? max_rgb : ctx->i32_1; >> + LLVMValueRef min_alpha = >> + bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0); >> + bool has_intrinsic = HAVE_LLVM >= 0x0600; >> + >> + /* Clamp. */ >> + if (!has_intrinsic || bits != 16) { >> + for (int i = 0; i < 2; i++) { >> + bool alpha = hi && i == 1; >> + args[i] = ac_build_imin(ctx, args[i], >> + alpha ? max_alpha : >> max_rgb); >> + args[i] = ac_build_imax(ctx, args[i], >> + alpha ? min_alpha : >> min_rgb); >> + } >> + } >> + >> + if (has_intrinsic) { >> + LLVMValueRef res = >> + ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16", >> + ctx->v2i16, args, 2, >> + AC_FUNC_ATTR_READNONE); >> + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); >> + } >> + >> + return ac_llvm_pack_two_int32_as_int16(ctx, args); >> +} >> + >> +/* The 8-bit and 10-bit clamping is for HW workarounds. */ >> +LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx, >> + LLVMValueRef args[2], unsigned bits, bool >> hi) >> +{ >> + assert(bits == 8 || bits == 10 || bits == 16); >> + >> + LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, >> + bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0); >> + LLVMValueRef max_alpha = >> + bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0); >> + bool has_intrinsic = HAVE_LLVM >= 0x0600; >> + >> + /* Clamp. */ >> + if (!has_intrinsic || bits != 16) { >> + for (int i = 0; i < 2; i++) { >> + bool alpha = hi && i == 1; >> + args[i] = ac_build_umin(ctx, args[i], >> + alpha ? max_alpha : >> max_rgb); >> + } >> + } >> + >> + if (has_intrinsic) { >> + LLVMValueRef res = >> + ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16", >> + ctx->v2i16, args, 2, >> + AC_FUNC_ATTR_READNONE); >> + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); >> + } >> + >> + return ac_llvm_pack_two_int16(ctx, args); >> +} >> + >> LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef >> i1) >> { >> assert(HAVE_LLVM >= 0x0600); >> return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1, >> &i1, 1, AC_FUNC_ATTR_READNONE); >> } >> void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef >> i1) >> { >> if (HAVE_LLVM >= 0x0600) { >> diff --git a/src/amd/common/ac_llvm_build.h >> b/src/amd/common/ac_llvm_build.h >> index 0deb5b5..3f0e9e2 100644 >> --- a/src/amd/common/ac_llvm_build.h >> +++ b/src/amd/common/ac_llvm_build.h >> @@ -50,20 +50,21 @@ struct ac_llvm_context { >> LLVMTypeRef voidt; >> LLVMTypeRef i1; >> LLVMTypeRef i8; >> LLVMTypeRef i16; >> LLVMTypeRef i32; >> LLVMTypeRef i64; >> LLVMTypeRef intptr; >> LLVMTypeRef f16; >> LLVMTypeRef f32; >> LLVMTypeRef f64; >> + LLVMTypeRef v2i16; >> LLVMTypeRef v2i32; >> LLVMTypeRef v3i32; >> LLVMTypeRef v4i32; >> LLVMTypeRef v2f32; >> LLVMTypeRef v4f32; >> LLVMTypeRef v8i32; >> LLVMValueRef i32_0; >> LLVMValueRef i32_1; >> LLVMValueRef f32_0; >> @@ -238,20 +239,24 @@ LLVMValueRef ac_build_imsb(struct ac_llvm_context >> *ctx, >> LLVMValueRef arg, >> LLVMTypeRef dst_type); >> LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx, >> LLVMValueRef arg, >> LLVMTypeRef dst_type); >> LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a, >> LLVMValueRef b); >> LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a, >> LLVMValueRef b); >> +LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a, >> + LLVMValueRef b); >> +LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a, >> + LLVMValueRef b); >> LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, >> LLVMValueRef b); >> LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef >> value); >> struct ac_export_args { >> LLVMValueRef out[4]; >> unsigned target; >> unsigned enabled_channels; >> bool compr; >> bool done; >> bool valid_mask; >> @@ -282,20 +287,28 @@ struct ac_image_args { >> LLVMValueRef addr; >> unsigned dmask; >> bool unorm; >> bool da; >> }; >> LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, >> struct ac_image_args *a); >> LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, >> LLVMValueRef args[2]); >> +LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, >> + LLVMValueRef args[2]); >> +LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, >> + LLVMValueRef args[2]); >> +LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx, >> + LLVMValueRef args[2], unsigned bits, bool >> hi); >> +LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx, >> + LLVMValueRef args[2], unsigned bits, bool >> hi); >> LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef >> i1); >> void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef >> i1); >> LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef >> input, >> LLVMValueRef offset, LLVMValueRef width, >> bool is_signed); >> void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16); >> void ac_get_image_intr_name(const char *base_name, >> LLVMTypeRef data_type, >> diff --git a/src/gallium/drivers/radeonsi/si_shader.c >> b/src/gallium/drivers/radeonsi/si_shader.c >> index 453822c..a695aad 100644 >> --- a/src/gallium/drivers/radeonsi/si_shader.c >> +++ b/src/gallium/drivers/radeonsi/si_shader.c >> @@ -2093,51 +2093,27 @@ static LLVMValueRef fetch_constant( >> >> ctx->num_const_buffers); >> index = LLVMBuildAdd(ctx->ac.builder, index, >> LLVMConstInt(ctx->i32, >> SI_NUM_SHADER_BUFFERS, 0), ""); >> bufp = ac_build_load_to_sgpr(&ctx->ac, ptr, index); >> } else >> bufp = load_const_buffer_desc(ctx, buf); >> return bitcast(bld_base, type, buffer_load_const(ctx, bufp, >> addr)); >> } >> -/* Upper 16 bits must be zero. */ >> -static LLVMValueRef si_llvm_pack_two_int16(struct si_shader_context *ctx, >> - LLVMValueRef val[2]) >> -{ >> - return LLVMBuildOr(ctx->ac.builder, val[0], >> - LLVMBuildShl(ctx->ac.builder, val[1], >> - LLVMConstInt(ctx->i32, 16, 0), >> - ""), ""); >> -} >> - >> -/* Upper 16 bits are ignored and will be dropped. */ >> -static LLVMValueRef si_llvm_pack_two_int32_as_int16(struct >> si_shader_context *ctx, >> - LLVMValueRef val[2]) >> -{ >> - LLVMValueRef v[2] = { >> - LLVMBuildAnd(ctx->ac.builder, val[0], >> - LLVMConstInt(ctx->i32, 0xffff, 0), ""), >> - val[1], >> - }; >> - return si_llvm_pack_two_int16(ctx, v); >> -} >> - >> /* Initialize arguments for the shader export intrinsic */ >> static void si_llvm_init_export_args(struct si_shader_context *ctx, >> LLVMValueRef *values, >> unsigned target, >> struct ac_export_args *args) >> { >> LLVMValueRef f32undef = LLVMGetUndef(ctx->ac.f32); >> - LLVMBuilderRef builder = ctx->ac.builder; >> - LLVMValueRef val[4]; >> unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR; >> unsigned chan; >> bool is_int8, is_int10; >> /* Default is 0xf. Adjusted below depending on the format. */ >> args->enabled_channels = 0xf; /* writemask */ >> /* Specify whether the EXEC mask represents the valid mask */ >> args->valid_mask = 0; >> @@ -2157,20 +2133,24 @@ static void si_llvm_init_export_args(struct >> si_shader_context *ctx, >> is_int8 = (key->part.ps.epilog.color_is_int8 >> cbuf) & >> 0x1; >> is_int10 = (key->part.ps.epilog.color_is_int10 >> cbuf) & >> 0x1; >> } >> args->compr = false; >> args->out[0] = f32undef; >> args->out[1] = f32undef; >> args->out[2] = f32undef; >> args->out[3] = f32undef; >> + LLVMValueRef (*packf)(struct ac_llvm_context *ctx, LLVMValueRef >> args[2]) = NULL; >> + LLVMValueRef (*packi)(struct ac_llvm_context *ctx, LLVMValueRef >> args[2], >> + unsigned bits, bool hi) = NULL; >> + >> switch (spi_shader_col_format) { >> case V_028714_SPI_SHADER_ZERO: >> args->enabled_channels = 0; /* writemask */ >> args->target = V_008DFC_SQ_EXP_NULL; >> break; >> case V_028714_SPI_SHADER_32_R: >> args->enabled_channels = 1; /* writemask */ >> args->out[0] = values[0]; >> break; >> @@ -2181,127 +2161,73 @@ static void si_llvm_init_export_args(struct >> si_shader_context *ctx, >> args->out[1] = values[1]; >> break; >> case V_028714_SPI_SHADER_32_AR: >> args->enabled_channels = 0x9; /* writemask */ >> args->out[0] = values[0]; >> args->out[3] = values[3]; >> break; >> case V_028714_SPI_SHADER_FP16_ABGR: >> - args->compr = 1; /* COMPR flag */ >> - >> - for (chan = 0; chan < 2; chan++) { >> - LLVMValueRef pack_args[2] = { >> - values[2 * chan], >> - values[2 * chan + 1] >> - }; >> - LLVMValueRef packed; >> - >> - packed = ac_build_cvt_pkrtz_f16(&ctx->ac, >> pack_args); >> - args->out[chan] = ac_to_float(&ctx->ac, packed); >> - } >> + packf = ac_build_cvt_pkrtz_f16; >> break; >> case V_028714_SPI_SHADER_UNORM16_ABGR: >> - for (chan = 0; chan < 4; chan++) { >> - val[chan] = ac_build_clamp(&ctx->ac, >> values[chan]); >> - val[chan] = LLVMBuildFMul(builder, val[chan], >> - LLVMConstReal(ctx->f32, >> 65535), ""); >> - val[chan] = LLVMBuildFAdd(builder, val[chan], >> - LLVMConstReal(ctx->f32, >> 0.5), ""); >> - val[chan] = LLVMBuildFPToUI(builder, val[chan], >> - ctx->i32, ""); >> - } >> - >> - args->compr = 1; /* COMPR flag */ >> - args->out[0] = ac_to_float(&ctx->ac, >> si_llvm_pack_two_int16(ctx, val)); >> - args->out[1] = ac_to_float(&ctx->ac, >> si_llvm_pack_two_int16(ctx, val+2)); >> + packf = ac_build_cvt_pknorm_u16; >> break; >> case V_028714_SPI_SHADER_SNORM16_ABGR: >> - for (chan = 0; chan < 4; chan++) { >> - /* Clamp between [-1, 1]. */ >> - val[chan] = >> lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MIN, >> - >> values[chan], >> - >> LLVMConstReal(ctx->f32, 1)); >> - val[chan] = >> lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MAX, >> - val[chan], >> - >> LLVMConstReal(ctx->f32, -1)); >> - /* Convert to a signed integer in [-32767, 32767]. >> */ >> - val[chan] = LLVMBuildFMul(builder, val[chan], >> - LLVMConstReal(ctx->f32, >> 32767), ""); >> - /* If positive, add 0.5, else add -0.5. */ >> - val[chan] = LLVMBuildFAdd(builder, val[chan], >> - LLVMBuildSelect(builder, >> - LLVMBuildFCmp(builder, >> LLVMRealOGE, >> - val[chan], >> ctx->ac.f32_0, ""), >> - LLVMConstReal(ctx->f32, >> 0.5), >> - LLVMConstReal(ctx->f32, >> -0.5), ""), ""); >> - val[chan] = LLVMBuildFPToSI(builder, val[chan], >> ctx->i32, ""); >> - } >> - >> - args->compr = 1; /* COMPR flag */ >> - args->out[0] = ac_to_float(&ctx->ac, >> si_llvm_pack_two_int32_as_int16(ctx, val)); >> - args->out[1] = ac_to_float(&ctx->ac, >> si_llvm_pack_two_int32_as_int16(ctx, val+2)); >> + packf = ac_build_cvt_pknorm_i16; >> break; >> - case V_028714_SPI_SHADER_UINT16_ABGR: { >> - LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, >> - is_int8 ? 255 : is_int10 ? 1023 : 65535, 0); >> - LLVMValueRef max_alpha = >> - !is_int10 ? max_rgb : LLVMConstInt(ctx->i32, 3, >> 0); >> + case V_028714_SPI_SHADER_UINT16_ABGR: >> + packi = ac_build_cvt_pk_u16; >> + break; >> - /* Clamp. */ >> - for (chan = 0; chan < 4; chan++) { >> - val[chan] = ac_to_integer(&ctx->ac, values[chan]); >> - val[chan] = >> lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_UMIN, >> - val[chan], >> - chan == 3 ? max_alpha : max_rgb); >> - } >> + case V_028714_SPI_SHADER_SINT16_ABGR: >> + packi = ac_build_cvt_pk_i16; >> + break; >> - args->compr = 1; /* COMPR flag */ >> - args->out[0] = ac_to_float(&ctx->ac, >> si_llvm_pack_two_int16(ctx, val)); >> - args->out[1] = ac_to_float(&ctx->ac, >> si_llvm_pack_two_int16(ctx, val+2)); >> + case V_028714_SPI_SHADER_32_ABGR: >> + memcpy(&args->out[0], values, sizeof(values[0]) * 4); >> break; >> } >> - case V_028714_SPI_SHADER_SINT16_ABGR: { >> - LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, >> - is_int8 ? 127 : is_int10 ? 511 : 32767, 0); >> - LLVMValueRef min_rgb = LLVMConstInt(ctx->i32, >> - is_int8 ? -128 : is_int10 ? -512 : -32768, 0); >> - LLVMValueRef max_alpha = >> - !is_int10 ? max_rgb : ctx->i32_1; >> - LLVMValueRef min_alpha = >> - !is_int10 ? min_rgb : LLVMConstInt(ctx->i32, -2, >> 0); >> + /* Pack f16 or norm_i16/u16. */ >> + if (packf) { >> + for (chan = 0; chan < 2; chan++) { >> + LLVMValueRef pack_args[2] = { >> + values[2 * chan], >> + values[2 * chan + 1] >> + }; >> + LLVMValueRef packed; >> - /* Clamp. */ >> - for (chan = 0; chan < 4; chan++) { >> - val[chan] = ac_to_integer(&ctx->ac, values[chan]); >> - val[chan] = >> lp_build_emit_llvm_binary(&ctx->bld_base, >> - TGSI_OPCODE_IMIN, >> - val[chan], chan == 3 ? max_alpha : >> max_rgb); >> - val[chan] = >> lp_build_emit_llvm_binary(&ctx->bld_base, >> - TGSI_OPCODE_IMAX, >> - val[chan], chan == 3 ? min_alpha : >> min_rgb); >> + packed = packf(&ctx->ac, pack_args); >> + args->out[chan] = ac_to_float(&ctx->ac, packed); >> } >> - >> args->compr = 1; /* COMPR flag */ >> - args->out[0] = ac_to_float(&ctx->ac, >> si_llvm_pack_two_int32_as_int16(ctx, val)); >> - args->out[1] = ac_to_float(&ctx->ac, >> si_llvm_pack_two_int32_as_int16(ctx, val+2)); >> - break; >> } >> + /* Pack i16/u16. */ >> + if (packi) { >> + for (chan = 0; chan < 2; chan++) { >> + LLVMValueRef pack_args[2] = { >> + ac_to_integer(&ctx->ac, values[2 * chan]), >> + ac_to_integer(&ctx->ac, values[2 * chan + >> 1]) >> + }; >> + LLVMValueRef packed; >> - case V_028714_SPI_SHADER_32_ABGR: >> - memcpy(&args->out[0], values, sizeof(values[0]) * 4); >> - break; >> + packed = packi(&ctx->ac, pack_args, >> + is_int8 ? 8 : is_int10 ? 10 : 16, >> + chan == 1); >> + args->out[chan] = ac_to_float(&ctx->ac, packed); >> + } >> + args->compr = 1; /* COMPR flag */ >> } >> } >> static void si_alpha_test(struct lp_build_tgsi_context *bld_base, >> LLVMValueRef alpha) >> { >> struct si_shader_context *ctx = si_shader_context(bld_base); >> if (ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_NEVER) >> { >> static LLVMRealPredicate cond_map[PIPE_FUNC_ALWAYS + 1] = >> { >> > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev