Re: [Mesa-dev] [PATCH 4/4] radeonsi: emit PS exports last
On 19.07.2016 18:32, Marek Olšák wrote: On Tue, Jul 19, 2016 at 3:43 PM, Nicolai Hähnlewrote: Patches 1, 3 & 4 are Reviewed-by: Nicolai Hähnle Why not patch 2? That was me being thoroughly confused today. 2 is 3 to a nearest approximation or something like that... Anyway, series is Reviewed-by: Nicolai Hähnle Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/4] radeonsi: emit PS exports last
On Tue, Jul 19, 2016 at 3:43 PM, Nicolai Hähnlewrote: > Patches 1, 3 & 4 are > > Reviewed-by: Nicolai Hähnle Why not patch 2? Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/4] radeonsi: emit PS exports last
Patches 1, 3 & 4 are Reviewed-by: Nicolai HähnleOn 18.07.2016 14:14, Marek Olšák wrote: From: Marek Olšák This effectively removes s_waitcnt instructions after FP16 exports. Before: v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 0, 0, v0, v1, v0, v0 ; F800040F 0100 s_waitcnt expcnt(0) ; BF8C0F0F v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04 v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 exp 15, 1, 1, 0, 0, v0, v1, v0, v0 ; F800041F 0100 s_waitcnt expcnt(0) ; BF8C0F0F v_cvt_pkrtz_f16_f32_e32 v0, v8, v9 ; 5E001308 v_cvt_pkrtz_f16_f32_e32 v1, v10, v11 ; 5E02170A exp 15, 2, 1, 0, 0, v0, v1, v0, v0 ; F800042F 0100 s_waitcnt expcnt(0) ; BF8C0F0F v_cvt_pkrtz_f16_f32_e32 v0, v12, v13 ; 5E001B0C v_cvt_pkrtz_f16_f32_e32 v1, v14, v15 ; 5E021F0E exp 15, 3, 1, 1, 1, v0, v1, v0, v0 ; F8001C3F 0100 s_endpgm ; BF81 After: v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v2, v4, v5 ; 5E040B04 v_cvt_pkrtz_f16_f32_e32 v3, v6, v7 ; 5E060F06 exp 15, 0, 1, 0, 0, v0, v1, v0, v0 ; F800040F 0100 v_cvt_pkrtz_f16_f32_e32 v4, v8, v9 ; 5E081308 v_cvt_pkrtz_f16_f32_e32 v5, v10, v11 ; 5E0A170A exp 15, 1, 1, 0, 0, v2, v3, v0, v0 ; F800041F 0302 v_cvt_pkrtz_f16_f32_e32 v6, v12, v13 ; 5E0C1B0C v_cvt_pkrtz_f16_f32_e32 v7, v14, v15 ; 5E0E1F0E exp 15, 2, 1, 0, 0, v4, v5, v0, v0 ; F800042F 0504 exp 15, 3, 1, 1, 1, v6, v7, v0, v0 ; F8001C3F 0706 s_endpgm ; BF81 --- src/gallium/drivers/radeonsi/si_shader.c | 44 ++-- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 41bcbd4..adf706c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2917,9 +2917,14 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base) FREE(outputs); } +struct si_ps_exports { + unsigned num; + LLVMValueRef args[10][9]; +}; + static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base, - LLVMValueRef depth, LLVMValueRef stencil, - LLVMValueRef samplemask) + LLVMValueRef depth, LLVMValueRef stencil, + LLVMValueRef samplemask, struct si_ps_exports *exp) { struct si_shader_context *ctx = si_shader_context(bld_base); struct lp_build_context *base = _base->base; @@ -2965,14 +2970,13 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base, /* Specify which components to enable */ args[0] = lp_build_const_int32(base->gallivm, mask); - lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", - ctx->voidt, args, 9, 0); + memcpy(exp->args[exp->num++], args, sizeof(args)); } static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, LLVMValueRef *color, unsigned index, unsigned samplemask_param, - bool is_last) + bool is_last, struct si_ps_exports *exp) { struct si_shader_context *ctx = si_shader_context(bld_base); struct lp_build_context *base = _base->base; @@ -3018,8 +3022,7 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, } else if (args[c][0] == bld_base->uint_bld.zero) continue; /* unnecessary NULL export */ - lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", - ctx->voidt, args[c], 9, 0); + memcpy(exp->args[exp->num++], args[c], sizeof(args[c])); } } else { LLVMValueRef args[9]; @@ -3033,11 +3036,19 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, } else if (args[0] == bld_base->uint_bld.zero) return; /* unnecessary NULL export */ - lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", - ctx->voidt, args, 9, 0); + memcpy(exp->args[exp->num++], args, sizeof(args)); } } +static void si_emit_ps_exports(struct si_shader_context *ctx, + struct si_ps_exports *exp) +{ + for (unsigned i = 0; i < exp->num; i++) + lp_build_intrinsic(ctx->radeon_bld.gallivm.builder, +
[Mesa-dev] [PATCH 4/4] radeonsi: emit PS exports last
From: Marek OlšákThis effectively removes s_waitcnt instructions after FP16 exports. Before: v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 0, 0, v0, v1, v0, v0 ; F800040F 0100 s_waitcnt expcnt(0) ; BF8C0F0F v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04 v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 exp 15, 1, 1, 0, 0, v0, v1, v0, v0 ; F800041F 0100 s_waitcnt expcnt(0) ; BF8C0F0F v_cvt_pkrtz_f16_f32_e32 v0, v8, v9 ; 5E001308 v_cvt_pkrtz_f16_f32_e32 v1, v10, v11 ; 5E02170A exp 15, 2, 1, 0, 0, v0, v1, v0, v0 ; F800042F 0100 s_waitcnt expcnt(0) ; BF8C0F0F v_cvt_pkrtz_f16_f32_e32 v0, v12, v13 ; 5E001B0C v_cvt_pkrtz_f16_f32_e32 v1, v14, v15 ; 5E021F0E exp 15, 3, 1, 1, 1, v0, v1, v0, v0 ; F8001C3F 0100 s_endpgm ; BF81 After: v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v2, v4, v5 ; 5E040B04 v_cvt_pkrtz_f16_f32_e32 v3, v6, v7 ; 5E060F06 exp 15, 0, 1, 0, 0, v0, v1, v0, v0 ; F800040F 0100 v_cvt_pkrtz_f16_f32_e32 v4, v8, v9 ; 5E081308 v_cvt_pkrtz_f16_f32_e32 v5, v10, v11 ; 5E0A170A exp 15, 1, 1, 0, 0, v2, v3, v0, v0 ; F800041F 0302 v_cvt_pkrtz_f16_f32_e32 v6, v12, v13 ; 5E0C1B0C v_cvt_pkrtz_f16_f32_e32 v7, v14, v15 ; 5E0E1F0E exp 15, 2, 1, 0, 0, v4, v5, v0, v0 ; F800042F 0504 exp 15, 3, 1, 1, 1, v6, v7, v0, v0 ; F8001C3F 0706 s_endpgm ; BF81 --- src/gallium/drivers/radeonsi/si_shader.c | 44 ++-- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 41bcbd4..adf706c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2917,9 +2917,14 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base) FREE(outputs); } +struct si_ps_exports { + unsigned num; + LLVMValueRef args[10][9]; +}; + static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base, - LLVMValueRef depth, LLVMValueRef stencil, - LLVMValueRef samplemask) + LLVMValueRef depth, LLVMValueRef stencil, + LLVMValueRef samplemask, struct si_ps_exports *exp) { struct si_shader_context *ctx = si_shader_context(bld_base); struct lp_build_context *base = _base->base; @@ -2965,14 +2970,13 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base, /* Specify which components to enable */ args[0] = lp_build_const_int32(base->gallivm, mask); - lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", - ctx->voidt, args, 9, 0); + memcpy(exp->args[exp->num++], args, sizeof(args)); } static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, LLVMValueRef *color, unsigned index, unsigned samplemask_param, - bool is_last) + bool is_last, struct si_ps_exports *exp) { struct si_shader_context *ctx = si_shader_context(bld_base); struct lp_build_context *base = _base->base; @@ -3018,8 +3022,7 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, } else if (args[c][0] == bld_base->uint_bld.zero) continue; /* unnecessary NULL export */ - lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", - ctx->voidt, args[c], 9, 0); + memcpy(exp->args[exp->num++], args[c], sizeof(args[c])); } } else { LLVMValueRef args[9]; @@ -3033,11 +3036,19 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, } else if (args[0] == bld_base->uint_bld.zero) return; /* unnecessary NULL export */ - lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", - ctx->voidt, args, 9, 0); + memcpy(exp->args[exp->num++], args, sizeof(args)); } } +static void si_emit_ps_exports(struct si_shader_context *ctx, + struct si_ps_exports *exp) +{ + for (unsigned i = 0; i < exp->num; i++) + lp_build_intrinsic(ctx->radeon_bld.gallivm.builder, + "llvm.SI.export", ctx->voidt, + exp->args[i], 9, 0); +} + static void si_export_null(struct