On 01/05/2018 02:45 AM, Marek Olšák wrote:
On Thu, Jan 4, 2018 at 10:25 AM, Samuel Pitoiset
<samuel.pitoi...@gmail.com> wrote:
How about performance?

Few weeks ago, I fixed a bug (5f81a43535e8512cef26ea3dcd1e3a489bd5a1bb)
which affected F1 2017 and DOW3 on RADV, and it was also a nice performance
boost, this is why I'm asking.

No idea. This just decreases the number of instructions in some PS epilogs.

Okay, the series no longer applies on master, do you have a branch somewhere? I would like to run, at least, F1 and DOW3.


Marek



On 01/04/2018 01:55 AM, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

---
   src/amd/common/ac_llvm_build.c           | 164
+++++++++++++++++++++++++++++++
   src/amd/common/ac_llvm_build.h           |  13 +++
   src/gallium/drivers/radeonsi/si_shader.c | 152
++++++++--------------------
   3 files changed, 216 insertions(+), 113 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c
b/src/amd/common/ac_llvm_build.c
index 7100e52..c48a186 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -61,20 +61,21 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
LLVMContextRef context,
         ctx->voidt = LLVMVoidTypeInContext(ctx->context);
         ctx->i1 = LLVMInt1TypeInContext(ctx->context);
         ctx->i8 = LLVMInt8TypeInContext(ctx->context);
         ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
         ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
         ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
         ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64;
         ctx->f16 = LLVMHalfTypeInContext(ctx->context);
         ctx->f32 = LLVMFloatTypeInContext(ctx->context);
         ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
+       ctx->v2i16 = LLVMVectorType(ctx->i16, 2);
         ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
         ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
         ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
         ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
         ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
         ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
         ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
         ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
         ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
@@ -1214,20 +1215,34 @@ LLVMValueRef ac_build_fmin(struct ac_llvm_context
*ctx, LLVMValueRef a,
   }
     LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef
a,
                            LLVMValueRef b)
   {
         LLVMValueRef args[2] = {a, b};
         return ac_build_intrinsic(ctx, "llvm.maxnum.f32", ctx->f32, args,
2,
                                   AC_FUNC_ATTR_READNONE);
   }
   +LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
+                          LLVMValueRef b)
+{
+       LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b,
"");
+       return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+}
+
+LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
+                          LLVMValueRef b)
+{
+       LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b,
"");
+       return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+}
+
   LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a,
                            LLVMValueRef b)
   {
         LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b,
"");
         return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
   }
     LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef
value)
   {
         if (HAVE_LLVM >= 0x0500) {
@@ -1439,20 +1454,169 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct
ac_llvm_context *ctx,
                                            v2f16, args, 2,
                                            AC_FUNC_ATTR_READNONE);
                 return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
         }
         return ac_build_intrinsic(ctx, "llvm.SI.packf16", ctx->i32, args,
2,
                                   AC_FUNC_ATTR_READNONE |
                                   AC_FUNC_ATTR_LEGACY);
   }
   +/* Upper 16 bits must be zero. */
+static LLVMValueRef ac_llvm_pack_two_int16(struct ac_llvm_context *ctx,
+                                          LLVMValueRef val[2])
+{
+       return LLVMBuildOr(ctx->builder, val[0],
+                          LLVMBuildShl(ctx->builder, val[1],
+                                       LLVMConstInt(ctx->i32, 16, 0),
+                                       ""), "");
+}
+
+/* Upper 16 bits are ignored and will be dropped. */
+static LLVMValueRef ac_llvm_pack_two_int32_as_int16(struct
ac_llvm_context *ctx,
+                                                   LLVMValueRef val[2])
+{
+       LLVMValueRef v[2] = {
+               LLVMBuildAnd(ctx->builder, val[0],
+                            LLVMConstInt(ctx->i32, 0xffff, 0), ""),
+               val[1],
+       };
+       return ac_llvm_pack_two_int16(ctx, v);
+}
+
+LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
+                                    LLVMValueRef args[2])
+{
+       if (HAVE_LLVM >= 0x0600) {
+               LLVMValueRef res =
+                       ac_build_intrinsic(ctx,
"llvm.amdgcn.cvt.pknorm.i16",
+                                          ctx->v2i16, args, 2,
+                                          AC_FUNC_ATTR_READNONE);
+               return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+       }
+
+       LLVMValueRef val[2];
+
+       for (int chan = 0; chan < 2; chan++) {
+               /* Clamp between [-1, 1]. */
+               val[chan] = ac_build_fmin(ctx, args[chan], ctx->f32_1);
+               val[chan] = ac_build_fmax(ctx, val[chan],
LLVMConstReal(ctx->f32, -1));
+               /* Convert to a signed integer in [-32767, 32767]. */
+               val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
+                                         LLVMConstReal(ctx->f32, 32767),
"");
+               /* If positive, add 0.5, else add -0.5. */
+               val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
+                               LLVMBuildSelect(ctx->builder,
+                                       LLVMBuildFCmp(ctx->builder,
LLVMRealOGE,
+                                                     val[chan],
ctx->f32_0, ""),
+                                       LLVMConstReal(ctx->f32, 0.5),
+                                       LLVMConstReal(ctx->f32, -0.5),
""), "");
+               val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan],
ctx->i32, "");
+       }
+       return ac_llvm_pack_two_int32_as_int16(ctx, val);
+}
+
+LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
+                                    LLVMValueRef args[2])
+{
+       if (HAVE_LLVM >= 0x0600) {
+               LLVMValueRef res =
+                       ac_build_intrinsic(ctx,
"llvm.amdgcn.cvt.pknorm.u16",
+                                          ctx->v2i16, args, 2,
+                                          AC_FUNC_ATTR_READNONE);
+               return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+       }
+
+       LLVMValueRef val[2];
+
+       for (int chan = 0; chan < 2; chan++) {
+               val[chan] = ac_build_clamp(ctx, args[chan]);
+               val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
+                                         LLVMConstReal(ctx->f32, 65535),
"");
+               val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
+                                         LLVMConstReal(ctx->f32, 0.5),
"");
+               val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
+                                           ctx->i32, "");
+       }
+       return ac_llvm_pack_two_int32_as_int16(ctx, val);
+}
+
+/* The 8-bit and 10-bit clamping is for HW workarounds. */
+LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
+                                LLVMValueRef args[2], unsigned bits, bool
hi)
+{
+       assert(bits == 8 || bits == 10 || bits == 16);
+
+       LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
+               bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0);
+       LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
+               bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0);
+       LLVMValueRef max_alpha =
+               bits != 10 ? max_rgb : ctx->i32_1;
+       LLVMValueRef min_alpha =
+               bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
+       bool has_intrinsic = HAVE_LLVM >= 0x0600;
+
+       /* Clamp. */
+       if (!has_intrinsic || bits != 16) {
+               for (int i = 0; i < 2; i++) {
+                       bool alpha = hi && i == 1;
+                       args[i] = ac_build_imin(ctx, args[i],
+                                               alpha ? max_alpha :
max_rgb);
+                       args[i] = ac_build_imax(ctx, args[i],
+                                               alpha ? min_alpha :
min_rgb);
+               }
+       }
+
+       if (has_intrinsic) {
+               LLVMValueRef res =
+                       ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16",
+                                          ctx->v2i16, args, 2,
+                                          AC_FUNC_ATTR_READNONE);
+               return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+       }
+
+       return ac_llvm_pack_two_int32_as_int16(ctx, args);
+}
+
+/* The 8-bit and 10-bit clamping is for HW workarounds. */
+LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
+                                LLVMValueRef args[2], unsigned bits, bool
hi)
+{
+       assert(bits == 8 || bits == 10 || bits == 16);
+
+       LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
+               bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0);
+       LLVMValueRef max_alpha =
+               bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
+       bool has_intrinsic = HAVE_LLVM >= 0x0600;
+
+       /* Clamp. */
+       if (!has_intrinsic || bits != 16) {
+               for (int i = 0; i < 2; i++) {
+                       bool alpha = hi && i == 1;
+                       args[i] = ac_build_umin(ctx, args[i],
+                                               alpha ? max_alpha :
max_rgb);
+               }
+       }
+
+       if (has_intrinsic) {
+               LLVMValueRef res =
+                       ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16",
+                                          ctx->v2i16, args, 2,
+                                          AC_FUNC_ATTR_READNONE);
+               return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+       }
+
+       return ac_llvm_pack_two_int16(ctx, args);
+}
+
   LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef
i1)
   {
         assert(HAVE_LLVM >= 0x0600);
         return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1,
                                   &i1, 1, AC_FUNC_ATTR_READNONE);
   }
     void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef
i1)
   {
         if (HAVE_LLVM >= 0x0600) {
diff --git a/src/amd/common/ac_llvm_build.h
b/src/amd/common/ac_llvm_build.h
index 0deb5b5..3f0e9e2 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -50,20 +50,21 @@ struct ac_llvm_context {
         LLVMTypeRef voidt;
         LLVMTypeRef i1;
         LLVMTypeRef i8;
         LLVMTypeRef i16;
         LLVMTypeRef i32;
         LLVMTypeRef i64;
         LLVMTypeRef intptr;
         LLVMTypeRef f16;
         LLVMTypeRef f32;
         LLVMTypeRef f64;
+       LLVMTypeRef v2i16;
         LLVMTypeRef v2i32;
         LLVMTypeRef v3i32;
         LLVMTypeRef v4i32;
         LLVMTypeRef v2f32;
         LLVMTypeRef v4f32;
         LLVMTypeRef v8i32;
         LLVMValueRef i32_0;
         LLVMValueRef i32_1;
         LLVMValueRef f32_0;
@@ -238,20 +239,24 @@ LLVMValueRef ac_build_imsb(struct ac_llvm_context
*ctx,
                            LLVMValueRef arg,
                            LLVMTypeRef dst_type);
     LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx,
                           LLVMValueRef arg,
                           LLVMTypeRef dst_type);
   LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
                            LLVMValueRef b);
   LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
                            LLVMValueRef b);
+LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
+                          LLVMValueRef b);
+LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
+                          LLVMValueRef b);
   LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a,
LLVMValueRef b);
   LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef
value);
     struct ac_export_args {
         LLVMValueRef out[4];
           unsigned target;
           unsigned enabled_channels;
           bool compr;
           bool done;
           bool valid_mask;
@@ -282,20 +287,28 @@ struct ac_image_args {
         LLVMValueRef addr;
         unsigned dmask;
         bool unorm;
         bool da;
   };
     LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
                                    struct ac_image_args *a);
   LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
                                     LLVMValueRef args[2]);
+LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
+                                    LLVMValueRef args[2]);
+LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
+                                    LLVMValueRef args[2]);
+LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
+                                LLVMValueRef args[2], unsigned bits, bool
hi);
+LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
+                                LLVMValueRef args[2], unsigned bits, bool
hi);
   LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef
i1);
   void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef
i1);
   LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef
input,
                           LLVMValueRef offset, LLVMValueRef width,
                           bool is_signed);
     void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
     void ac_get_image_intr_name(const char *base_name,
                             LLVMTypeRef data_type,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c
b/src/gallium/drivers/radeonsi/si_shader.c
index 453822c..a695aad 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2093,51 +2093,27 @@ static LLVMValueRef fetch_constant(

ctx->num_const_buffers);
                 index = LLVMBuildAdd(ctx->ac.builder, index,
                                      LLVMConstInt(ctx->i32,
SI_NUM_SHADER_BUFFERS, 0), "");
                 bufp = ac_build_load_to_sgpr(&ctx->ac, ptr, index);
         } else
                 bufp = load_const_buffer_desc(ctx, buf);
         return bitcast(bld_base, type, buffer_load_const(ctx, bufp,
addr));
   }
   -/* Upper 16 bits must be zero. */
-static LLVMValueRef si_llvm_pack_two_int16(struct si_shader_context *ctx,
-                                          LLVMValueRef val[2])
-{
-       return LLVMBuildOr(ctx->ac.builder, val[0],
-                          LLVMBuildShl(ctx->ac.builder, val[1],
-                                       LLVMConstInt(ctx->i32, 16, 0),
-                                       ""), "");
-}
-
-/* Upper 16 bits are ignored and will be dropped. */
-static LLVMValueRef si_llvm_pack_two_int32_as_int16(struct
si_shader_context *ctx,
-                                                   LLVMValueRef val[2])
-{
-       LLVMValueRef v[2] = {
-               LLVMBuildAnd(ctx->ac.builder, val[0],
-                            LLVMConstInt(ctx->i32, 0xffff, 0), ""),
-               val[1],
-       };
-       return si_llvm_pack_two_int16(ctx, v);
-}
-
   /* Initialize arguments for the shader export intrinsic */
   static void si_llvm_init_export_args(struct si_shader_context *ctx,
                                      LLVMValueRef *values,
                                      unsigned target,
                                      struct ac_export_args *args)
   {
         LLVMValueRef f32undef = LLVMGetUndef(ctx->ac.f32);
-       LLVMBuilderRef builder = ctx->ac.builder;
-       LLVMValueRef val[4];
         unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
         unsigned chan;
         bool is_int8, is_int10;
         /* Default is 0xf. Adjusted below depending on the format. */
         args->enabled_channels = 0xf; /* writemask */
         /* Specify whether the EXEC mask represents the valid mask */
         args->valid_mask = 0;
   @@ -2157,20 +2133,24 @@ static void si_llvm_init_export_args(struct
si_shader_context *ctx,
                 is_int8 = (key->part.ps.epilog.color_is_int8 >> cbuf) &
0x1;
                 is_int10 = (key->part.ps.epilog.color_is_int10 >> cbuf) &
0x1;
         }
         args->compr = false;
         args->out[0] = f32undef;
         args->out[1] = f32undef;
         args->out[2] = f32undef;
         args->out[3] = f32undef;
   +     LLVMValueRef (*packf)(struct ac_llvm_context *ctx, LLVMValueRef
args[2]) = NULL;
+       LLVMValueRef (*packi)(struct ac_llvm_context *ctx, LLVMValueRef
args[2],
+                             unsigned bits, bool hi) = NULL;
+
         switch (spi_shader_col_format) {
         case V_028714_SPI_SHADER_ZERO:
                 args->enabled_channels = 0; /* writemask */
                 args->target = V_008DFC_SQ_EXP_NULL;
                 break;
         case V_028714_SPI_SHADER_32_R:
                 args->enabled_channels = 1; /* writemask */
                 args->out[0] = values[0];
                 break;
@@ -2181,127 +2161,73 @@ static void si_llvm_init_export_args(struct
si_shader_context *ctx,
                 args->out[1] = values[1];
                 break;
         case V_028714_SPI_SHADER_32_AR:
                 args->enabled_channels = 0x9; /* writemask */
                 args->out[0] = values[0];
                 args->out[3] = values[3];
                 break;
         case V_028714_SPI_SHADER_FP16_ABGR:
-               args->compr = 1; /* COMPR flag */
-
-               for (chan = 0; chan < 2; chan++) {
-                       LLVMValueRef pack_args[2] = {
-                               values[2 * chan],
-                               values[2 * chan + 1]
-                       };
-                       LLVMValueRef packed;
-
-                       packed = ac_build_cvt_pkrtz_f16(&ctx->ac,
pack_args);
-                       args->out[chan] = ac_to_float(&ctx->ac, packed);
-               }
+               packf = ac_build_cvt_pkrtz_f16;
                 break;
         case V_028714_SPI_SHADER_UNORM16_ABGR:
-               for (chan = 0; chan < 4; chan++) {
-                       val[chan] = ac_build_clamp(&ctx->ac,
values[chan]);
-                       val[chan] = LLVMBuildFMul(builder, val[chan],
-                                                 LLVMConstReal(ctx->f32,
65535), "");
-                       val[chan] = LLVMBuildFAdd(builder, val[chan],
-                                                 LLVMConstReal(ctx->f32,
0.5), "");
-                       val[chan] = LLVMBuildFPToUI(builder, val[chan],
-                                                   ctx->i32, "");
-               }
-
-               args->compr = 1; /* COMPR flag */
-               args->out[0] = ac_to_float(&ctx->ac,
si_llvm_pack_two_int16(ctx, val));
-               args->out[1] = ac_to_float(&ctx->ac,
si_llvm_pack_two_int16(ctx, val+2));
+               packf = ac_build_cvt_pknorm_u16;
                 break;
         case V_028714_SPI_SHADER_SNORM16_ABGR:
-               for (chan = 0; chan < 4; chan++) {
-                       /* Clamp between [-1, 1]. */
-                       val[chan] =
lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MIN,
-
values[chan],
-
LLVMConstReal(ctx->f32, 1));
-                       val[chan] =
lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MAX,
-                                                             val[chan],
-
LLVMConstReal(ctx->f32, -1));
-                       /* Convert to a signed integer in [-32767, 32767].
*/
-                       val[chan] = LLVMBuildFMul(builder, val[chan],
-                                                 LLVMConstReal(ctx->f32,
32767), "");
-                       /* If positive, add 0.5, else add -0.5. */
-                       val[chan] = LLVMBuildFAdd(builder, val[chan],
-                                       LLVMBuildSelect(builder,
-                                               LLVMBuildFCmp(builder,
LLVMRealOGE,
-                                                             val[chan],
ctx->ac.f32_0, ""),
-                                               LLVMConstReal(ctx->f32,
0.5),
-                                               LLVMConstReal(ctx->f32,
-0.5), ""), "");
-                       val[chan] = LLVMBuildFPToSI(builder, val[chan],
ctx->i32, "");
-               }
-
-               args->compr = 1; /* COMPR flag */
-               args->out[0] = ac_to_float(&ctx->ac,
si_llvm_pack_two_int32_as_int16(ctx, val));
-               args->out[1] = ac_to_float(&ctx->ac,
si_llvm_pack_two_int32_as_int16(ctx, val+2));
+               packf = ac_build_cvt_pknorm_i16;
                 break;
   -     case V_028714_SPI_SHADER_UINT16_ABGR: {
-               LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
-                       is_int8 ? 255 : is_int10 ? 1023 : 65535, 0);
-               LLVMValueRef max_alpha =
-                       !is_int10 ? max_rgb : LLVMConstInt(ctx->i32, 3,
0);
+       case V_028714_SPI_SHADER_UINT16_ABGR:
+               packi = ac_build_cvt_pk_u16;
+               break;
   -             /* Clamp. */
-               for (chan = 0; chan < 4; chan++) {
-                       val[chan] = ac_to_integer(&ctx->ac, values[chan]);
-                       val[chan] =
lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_UMIN,
-                                       val[chan],
-                                       chan == 3 ? max_alpha : max_rgb);
-               }
+       case V_028714_SPI_SHADER_SINT16_ABGR:
+               packi = ac_build_cvt_pk_i16;
+               break;
   -             args->compr = 1; /* COMPR flag */
-               args->out[0] = ac_to_float(&ctx->ac,
si_llvm_pack_two_int16(ctx, val));
-               args->out[1] = ac_to_float(&ctx->ac,
si_llvm_pack_two_int16(ctx, val+2));
+       case V_028714_SPI_SHADER_32_ABGR:
+               memcpy(&args->out[0], values, sizeof(values[0]) * 4);
                 break;
         }
   -     case V_028714_SPI_SHADER_SINT16_ABGR: {
-               LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
-                       is_int8 ? 127 : is_int10 ? 511 : 32767, 0);
-               LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
-                       is_int8 ? -128 : is_int10 ? -512 : -32768, 0);
-               LLVMValueRef max_alpha =
-                       !is_int10 ? max_rgb : ctx->i32_1;
-               LLVMValueRef min_alpha =
-                       !is_int10 ? min_rgb : LLVMConstInt(ctx->i32, -2,
0);
+       /* Pack f16 or norm_i16/u16. */
+       if (packf) {
+               for (chan = 0; chan < 2; chan++) {
+                       LLVMValueRef pack_args[2] = {
+                               values[2 * chan],
+                               values[2 * chan + 1]
+                       };
+                       LLVMValueRef packed;
   -             /* Clamp. */
-               for (chan = 0; chan < 4; chan++) {
-                       val[chan] = ac_to_integer(&ctx->ac, values[chan]);
-                       val[chan] =
lp_build_emit_llvm_binary(&ctx->bld_base,
-                                       TGSI_OPCODE_IMIN,
-                                       val[chan], chan == 3 ? max_alpha :
max_rgb);
-                       val[chan] =
lp_build_emit_llvm_binary(&ctx->bld_base,
-                                       TGSI_OPCODE_IMAX,
-                                       val[chan], chan == 3 ? min_alpha :
min_rgb);
+                       packed = packf(&ctx->ac, pack_args);
+                       args->out[chan] = ac_to_float(&ctx->ac, packed);
                 }
-
                 args->compr = 1; /* COMPR flag */
-               args->out[0] = ac_to_float(&ctx->ac,
si_llvm_pack_two_int32_as_int16(ctx, val));
-               args->out[1] = ac_to_float(&ctx->ac,
si_llvm_pack_two_int32_as_int16(ctx, val+2));
-               break;
         }
+       /* Pack i16/u16. */
+       if (packi) {
+               for (chan = 0; chan < 2; chan++) {
+                       LLVMValueRef pack_args[2] = {
+                               ac_to_integer(&ctx->ac, values[2 * chan]),
+                               ac_to_integer(&ctx->ac, values[2 * chan +
1])
+                       };
+                       LLVMValueRef packed;
   -     case V_028714_SPI_SHADER_32_ABGR:
-               memcpy(&args->out[0], values, sizeof(values[0]) * 4);
-               break;
+                       packed = packi(&ctx->ac, pack_args,
+                                      is_int8 ? 8 : is_int10 ? 10 : 16,
+                                      chan == 1);
+                       args->out[chan] = ac_to_float(&ctx->ac, packed);
+               }
+               args->compr = 1; /* COMPR flag */
         }
   }
     static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
                           LLVMValueRef alpha)
   {
         struct si_shader_context *ctx = si_shader_context(bld_base);
         if (ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_NEVER)
{
                 static LLVMRealPredicate cond_map[PIPE_FUNC_ALWAYS + 1] =
{


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to