v2: add to patch series

Signed-off-by: Rhys Perry <pendingchao...@gmail.com>
---
 src/amd/common/ac_llvm_build.c           | 33 +++++++++++++++++-------
 src/amd/common/ac_llvm_build.h           |  3 ++-
 src/amd/common/ac_nir_to_llvm.c          | 14 +++++++---
 src/amd/vulkan/radv_nir_to_llvm.c        | 27 ++++++++++++++-----
 src/amd/vulkan/radv_pipeline.c           | 19 ++++++++------
 src/amd/vulkan/radv_shader.h             |  1 +
 src/gallium/drivers/radeonsi/si_shader.c |  2 +-
 7 files changed, 69 insertions(+), 30 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index dff369aae7f..be2c2251a21 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -937,27 +937,40 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
                   LLVMValueRef attr_number,
                   LLVMValueRef params,
                   LLVMValueRef i,
-                  LLVMValueRef j)
+                  LLVMValueRef j,
+                  int word)
 {
-       LLVMValueRef args[5];
+       LLVMValueRef args[6];
        LLVMValueRef p1;
 
        args[0] = i;
        args[1] = llvm_chan;
        args[2] = attr_number;
-       args[3] = params;
-
-       p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
-                               ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
+       if (word >= 0) {
+               args[3] = LLVMConstInt(ctx->i1, word, false);
+               args[4] = params;
+               p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16",
+                                       ctx->f16, args, 5, 
AC_FUNC_ATTR_READNONE);
+       } else {
+               args[3] = params;
+               p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
+                                       ctx->f32, args, 4, 
AC_FUNC_ATTR_READNONE);
+       }
 
        args[0] = p1;
        args[1] = j;
        args[2] = llvm_chan;
        args[3] = attr_number;
-       args[4] = params;
-
-       return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2",
-                                 ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
+       if (word >= 0) {
+               args[4] = LLVMConstInt(ctx->i1, word, false);
+               args[5] = params;
+               return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16",
+                                         ctx->f16, args, 6, 
AC_FUNC_ATTR_READNONE);
+       } else {
+               args[4] = params;
+               return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2",
+                                         ctx->f32, args, 5, 
AC_FUNC_ATTR_READNONE);
+       }
 }
 
 LLVMValueRef
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 61c9b5e4b6c..655427567c4 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -224,7 +224,8 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
                   LLVMValueRef attr_number,
                   LLVMValueRef params,
                   LLVMValueRef i,
-                  LLVMValueRef j);
+                  LLVMValueRef j,
+                  int word);
 
 LLVMValueRef
 ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index bf7024c68e4..939b8eb13de 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3120,8 +3120,15 @@ static LLVMValueRef visit_interp(struct ac_nir_context 
*ctx,
                                LLVMValueRef j = LLVMBuildExtractElement(
                                        ctx->ac.builder, interp_param, 
ctx->ac.i32_1, "");
 
+                               /* This fp16 handling isn't technically correct
+                                * but should be correct for the attributes we
+                                * are actually going to use. */
+                               bool fp16 = instr->dest.ssa.bit_size == 16;
+                               int word = fp16 ? 0 : -1;
                                v = ac_build_fs_interp(&ctx->ac, llvm_chan, 
attr_number,
-                                                      ctx->abi->prim_mask, i, 
j);
+                                                      ctx->abi->prim_mask, i, 
j, word);
+                               if (fp16)
+                                       v = ac_build_reinterpret(&ctx->ac, v, 
ctx->ac.f32);
                        } else {
                                v = ac_build_fs_interp_mov(&ctx->ac, 
LLVMConstInt(ctx->ac.i32, 2, false),
                                                           llvm_chan, 
attr_number, ctx->abi->prim_mask);
@@ -3134,8 +3141,9 @@ static LLVMValueRef visit_interp(struct ac_nir_context 
*ctx,
                result[chan] = LLVMBuildExtractElement(ctx->ac.builder, gather, 
attrib_idx, "");
 
        }
-       return ac_build_varying_gather_values(&ctx->ac, result, 
instr->num_components,
-                                             var->data.location_frac);
+       LLVMValueRef ret = ac_build_varying_gather_values(&ctx->ac, result, 
instr->num_components,
+                                                         
var->data.location_frac);
+       return ac_build_reinterpret(&ctx->ac, ret, get_def_type(ctx, 
&instr->dest.ssa));
 }
 
 static void visit_intrinsic(struct ac_nir_context *ctx,
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index c46eabf3656..49f8d35dd5f 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2051,7 +2051,8 @@ static void interp_fs_input(struct radv_shader_context 
*ctx,
                            unsigned attr,
                            LLVMValueRef interp_param,
                            LLVMValueRef prim_mask,
-                           LLVMValueRef result[4])
+                           LLVMValueRef result[4],
+                           bool fp16)
 {
        LLVMValueRef attr_number;
        unsigned chan;
@@ -2086,7 +2087,10 @@ static void interp_fs_input(struct radv_shader_context 
*ctx,
                        result[chan] = ac_build_fs_interp(&ctx->ac,
                                                          llvm_chan,
                                                          attr_number,
-                                                         prim_mask, i, j);
+                                                         prim_mask, i, j,
+                                                         fp16 ? 0 : -1);
+                       if (fp16)
+                               result[chan] = ac_build_reinterpret(&ctx->ac, 
result[chan], ctx->ac.f16);
                } else {
                        result[chan] = ac_build_fs_interp_mov(&ctx->ac,
                                                              
LLVMConstInt(ctx->ac.i32, 2, false),
@@ -2100,7 +2104,8 @@ static void interp_fs_input(struct radv_shader_context 
*ctx,
 
 static void
 handle_fs_input_decl(struct radv_shader_context *ctx,
-                    struct nir_variable *variable)
+                    struct nir_variable *variable,
+                    uint64_t *fp16_mask)
 {
        int idx = variable->data.location;
        unsigned attrib_count = glsl_count_attribute_slots(variable->type, 
false);
@@ -2110,7 +2115,8 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
        variable->data.driver_location = idx * 4;
        mask = ((1ull << attrib_count) - 1) << variable->data.location;
 
-       if (glsl_get_base_type(glsl_without_array(variable->type)) == 
GLSL_TYPE_FLOAT) {
+       enum glsl_base_type type = 
glsl_get_base_type(glsl_without_array(variable->type));
+       if (type == GLSL_TYPE_FLOAT || type == GLSL_TYPE_FLOAT16) {
                unsigned interp_type;
                if (variable->data.sample)
                        interp_type = INTERP_SAMPLE;
@@ -2120,6 +2126,9 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
                        interp_type = INTERP_CENTER;
 
                interp = lookup_interp_param(&ctx->abi, 
variable->data.interpolation, interp_type);
+
+               if (type == GLSL_TYPE_FLOAT16)
+                       *fp16_mask |= mask;
        }
 
        for (unsigned i = 0; i < attrib_count; ++i)
@@ -2173,8 +2182,9 @@ handle_fs_inputs(struct radv_shader_context *ctx,
 {
        prepare_interp_optimize(ctx, nir);
 
+       uint64_t fp16_mask = 0;
        nir_foreach_variable(variable, &nir->inputs)
-               handle_fs_input_decl(ctx, variable);
+               handle_fs_input_decl(ctx, variable, &fp16_mask);
 
        unsigned index = 0;
 
@@ -2194,11 +2204,14 @@ handle_fs_inputs(struct radv_shader_context *ctx,
                if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||
                    i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {
                        interp_param = *inputs;
+                       bool fp16 = fp16_mask & (1ull << i);
                        interp_fs_input(ctx, index, interp_param, 
ctx->abi.prim_mask,
-                                       inputs);
+                                       inputs, fp16);
 
                        if (!interp_param)
                                ctx->shader_info->fs.flat_shaded_mask |= 1u << 
index;
+                       if (fp16)
+                               ctx->shader_info->fs.fp16_mask |= 1u << index;
                        if (i >= VARYING_SLOT_VAR0)
                                ctx->abi.fs_input_attr_indices[i - 
VARYING_SLOT_VAR0] = index;
                        ++index;
@@ -2210,7 +2223,7 @@ handle_fs_inputs(struct radv_shader_context *ctx,
 
                                interp_param = *inputs;
                                interp_fs_input(ctx, index, interp_param,
-                                               ctx->abi.prim_mask, inputs);
+                                               ctx->abi.prim_mask, inputs, 
false);
                                ++index;
                        }
                } else if (i == VARYING_SLOT_POS) {
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index ab56a273a2c..a3260291bce 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3070,13 +3070,15 @@ radv_pipeline_generate_geometry_shader(struct 
radeon_cmdbuf *ctx_cs,
        radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, 
pipeline->gs_copy_shader);
 }
 
-static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
+static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, bool fp16)
 {
        uint32_t ps_input_cntl;
        if (offset <= AC_EXP_PARAM_OFFSET_31) {
                ps_input_cntl = S_028644_OFFSET(offset);
                if (flat_shade)
                        ps_input_cntl |= S_028644_FLAT_SHADE(1);
+               if (fp16 && !flat_shade)
+                       ps_input_cntl |= S_028644_FP16_INTERP_MODE(1);
        } else {
                /* The input is a DEFAULT_VAL constant. */
                assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
@@ -3101,7 +3103,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf 
*ctx_cs,
        if (ps->info.info.ps.prim_id_input) {
                unsigned vs_offset = 
outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
                if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
-                       ps_input_cntl[ps_offset] = 
offset_to_ps_input(vs_offset, true);
+                       ps_input_cntl[ps_offset] = 
offset_to_ps_input(vs_offset, true, false);
                        ++ps_offset;
                }
        }
@@ -3111,9 +3113,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf 
*ctx_cs,
            ps->info.info.needs_multiview_view_index) {
                unsigned vs_offset = 
outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
                if (vs_offset != AC_EXP_PARAM_UNDEFINED)
-                       ps_input_cntl[ps_offset] = 
offset_to_ps_input(vs_offset, true);
+                       ps_input_cntl[ps_offset] = 
offset_to_ps_input(vs_offset, true, false);
                else
-                       ps_input_cntl[ps_offset] = 
offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true);
+                       ps_input_cntl[ps_offset] = 
offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false);
                ++ps_offset;
        }
 
@@ -3129,21 +3131,21 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf 
*ctx_cs,
 
                vs_offset = 
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
                if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
-                       ps_input_cntl[ps_offset] = 
offset_to_ps_input(vs_offset, false);
+                       ps_input_cntl[ps_offset] = 
offset_to_ps_input(vs_offset, false, false);
                        ++ps_offset;
                }
 
                vs_offset = 
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
                if (vs_offset != AC_EXP_PARAM_UNDEFINED &&
                    ps->info.info.ps.num_input_clips_culls > 4) {
-                       ps_input_cntl[ps_offset] = 
offset_to_ps_input(vs_offset, false);
+                       ps_input_cntl[ps_offset] = 
offset_to_ps_input(vs_offset, false, false);
                        ++ps_offset;
                }
        }
 
        for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; 
++i) {
                unsigned vs_offset;
-               bool flat_shade;
+               bool flat_shade, fp16;
                if (!(ps->info.fs.input_mask & (1u << i)))
                        continue;
 
@@ -3155,8 +3157,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf 
*ctx_cs,
                }
 
                flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << 
ps_offset));
+               fp16 = !!(ps->info.fs.fp16_mask & (1u << ps_offset));
 
-               ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, 
flat_shade);
+               ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, 
flat_shade, fp16);
                ++ps_offset;
        }
 
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index b67cd2b4f15..f0e9bc249f9 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -257,6 +257,7 @@ struct radv_shader_variant_info {
                        unsigned num_interp;
                        uint32_t input_mask;
                        uint32_t flat_shaded_mask;
+                       uint32_t fp16_mask;
                        bool can_discard;
                        bool early_fragment_test;
                } fs;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index efae02ee91c..c1f82137020 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1751,7 +1751,7 @@ static LLVMValueRef si_build_fs_interp(struct 
si_shader_context *ctx,
                return ac_build_fs_interp(&ctx->ac,
                                          LLVMConstInt(ctx->i32, chan, 0),
                                          LLVMConstInt(ctx->i32, attr_index, 0),
-                                         prim_mask, i, j);
+                                         prim_mask, i, j, -1);
        }
        return ac_build_fs_interp_mov(&ctx->ac,
                                      LLVMConstInt(ctx->i32, 2, 0), /* P0 */
-- 
2.20.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to