Re: [Mesa-dev] [PATCH 4/4] radeonsi: emit PS exports last

2016-07-19 Thread Nicolai Hähnle

On 19.07.2016 18:32, Marek Olšák wrote:

On Tue, Jul 19, 2016 at 3:43 PM, Nicolai Hähnle  wrote:

Patches 1, 3 & 4 are

Reviewed-by: Nicolai Hähnle 


Why not patch 2?


That was me being thoroughly confused today. 2 is 3 to a nearest 
approximation or something like that...


Anyway, series is

Reviewed-by: Nicolai Hähnle 



Marek


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] radeonsi: emit PS exports last

2016-07-19 Thread Marek Olšák
On Tue, Jul 19, 2016 at 3:43 PM, Nicolai Hähnle  wrote:
> Patches 1, 3 & 4 are
>
> Reviewed-by: Nicolai Hähnle 

Why not patch 2?

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] radeonsi: emit PS exports last

2016-07-19 Thread Nicolai Hähnle

Patches 1, 3 & 4 are

Reviewed-by: Nicolai Hähnle 

On 18.07.2016 14:14, Marek Olšák wrote:

From: Marek Olšák 

This effectively removes s_waitcnt instructions after FP16 exports.

Before:

 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1   ; 5E000300
 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3   ; 5E020702
 exp 15, 0, 1, 0, 0, v0, v1, v0, v0   ; F800040F 0100
 s_waitcnt expcnt(0)  ; BF8C0F0F
 v_cvt_pkrtz_f16_f32_e32 v0, v4, v5   ; 5E000B04
 v_cvt_pkrtz_f16_f32_e32 v1, v6, v7   ; 5E020F06
 exp 15, 1, 1, 0, 0, v0, v1, v0, v0   ; F800041F 0100
 s_waitcnt expcnt(0)  ; BF8C0F0F
 v_cvt_pkrtz_f16_f32_e32 v0, v8, v9   ; 5E001308
 v_cvt_pkrtz_f16_f32_e32 v1, v10, v11 ; 5E02170A
 exp 15, 2, 1, 0, 0, v0, v1, v0, v0   ; F800042F 0100
 s_waitcnt expcnt(0)  ; BF8C0F0F
 v_cvt_pkrtz_f16_f32_e32 v0, v12, v13 ; 5E001B0C
 v_cvt_pkrtz_f16_f32_e32 v1, v14, v15 ; 5E021F0E
 exp 15, 3, 1, 1, 1, v0, v1, v0, v0   ; F8001C3F 0100
 s_endpgm ; BF81

After:

 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1   ; 5E000300
 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3   ; 5E020702
 v_cvt_pkrtz_f16_f32_e32 v2, v4, v5   ; 5E040B04
 v_cvt_pkrtz_f16_f32_e32 v3, v6, v7   ; 5E060F06
 exp 15, 0, 1, 0, 0, v0, v1, v0, v0   ; F800040F 0100
 v_cvt_pkrtz_f16_f32_e32 v4, v8, v9   ; 5E081308
 v_cvt_pkrtz_f16_f32_e32 v5, v10, v11 ; 5E0A170A
 exp 15, 1, 1, 0, 0, v2, v3, v0, v0   ; F800041F 0302
 v_cvt_pkrtz_f16_f32_e32 v6, v12, v13 ; 5E0C1B0C
 v_cvt_pkrtz_f16_f32_e32 v7, v14, v15 ; 5E0E1F0E
 exp 15, 2, 1, 0, 0, v4, v5, v0, v0   ; F800042F 0504
 exp 15, 3, 1, 1, 1, v6, v7, v0, v0   ; F8001C3F 0706
 s_endpgm ; BF81
---
  src/gallium/drivers/radeonsi/si_shader.c | 44 ++--
  1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 41bcbd4..adf706c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2917,9 +2917,14 @@ static void si_llvm_emit_vs_epilogue(struct 
lp_build_tgsi_context *bld_base)
FREE(outputs);
  }

+struct si_ps_exports {
+   unsigned num;
+   LLVMValueRef args[10][9];
+};
+
  static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
-  LLVMValueRef depth, LLVMValueRef stencil,
-  LLVMValueRef samplemask)
+   LLVMValueRef depth, LLVMValueRef stencil,
+   LLVMValueRef samplemask, struct si_ps_exports *exp)
  {
struct si_shader_context *ctx = si_shader_context(bld_base);
struct lp_build_context *base = _base->base;
@@ -2965,14 +2970,13 @@ static void si_export_mrt_z(struct 
lp_build_tgsi_context *bld_base,
/* Specify which components to enable */
args[0] = lp_build_const_int32(base->gallivm, mask);

-   lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
-  ctx->voidt, args, 9, 0);
+   memcpy(exp->args[exp->num++], args, sizeof(args));
  }

  static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
LLVMValueRef *color, unsigned index,
unsigned samplemask_param,
-   bool is_last)
+   bool is_last, struct si_ps_exports *exp)
  {
struct si_shader_context *ctx = si_shader_context(bld_base);
struct lp_build_context *base = _base->base;
@@ -3018,8 +3022,7 @@ static void si_export_mrt_color(struct 
lp_build_tgsi_context *bld_base,
} else if (args[c][0] == bld_base->uint_bld.zero)
continue; /* unnecessary NULL export */

-   lp_build_intrinsic(base->gallivm->builder, 
"llvm.SI.export",
-  ctx->voidt, args[c], 9, 0);
+   memcpy(exp->args[exp->num++], args[c], sizeof(args[c]));
}
} else {
LLVMValueRef args[9];
@@ -3033,11 +3036,19 @@ static void si_export_mrt_color(struct 
lp_build_tgsi_context *bld_base,
} else if (args[0] == bld_base->uint_bld.zero)
return; /* unnecessary NULL export */

-   lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
-  ctx->voidt, args, 9, 0);
+   memcpy(exp->args[exp->num++], args, sizeof(args));
}
  }

+static void si_emit_ps_exports(struct si_shader_context *ctx,
+  struct si_ps_exports *exp)
+{
+   for (unsigned i = 0; i < exp->num; i++)
+   lp_build_intrinsic(ctx->radeon_bld.gallivm.builder,
+ 

[Mesa-dev] [PATCH 4/4] radeonsi: emit PS exports last

2016-07-18 Thread Marek Olšák
From: Marek Olšák 

This effectively removes s_waitcnt instructions after FP16 exports.

Before:

v_cvt_pkrtz_f16_f32_e32 v0, v0, v1   ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3   ; 5E020702
exp 15, 0, 1, 0, 0, v0, v1, v0, v0   ; F800040F 0100
s_waitcnt expcnt(0)  ; BF8C0F0F
v_cvt_pkrtz_f16_f32_e32 v0, v4, v5   ; 5E000B04
v_cvt_pkrtz_f16_f32_e32 v1, v6, v7   ; 5E020F06
exp 15, 1, 1, 0, 0, v0, v1, v0, v0   ; F800041F 0100
s_waitcnt expcnt(0)  ; BF8C0F0F
v_cvt_pkrtz_f16_f32_e32 v0, v8, v9   ; 5E001308
v_cvt_pkrtz_f16_f32_e32 v1, v10, v11 ; 5E02170A
exp 15, 2, 1, 0, 0, v0, v1, v0, v0   ; F800042F 0100
s_waitcnt expcnt(0)  ; BF8C0F0F
v_cvt_pkrtz_f16_f32_e32 v0, v12, v13 ; 5E001B0C
v_cvt_pkrtz_f16_f32_e32 v1, v14, v15 ; 5E021F0E
exp 15, 3, 1, 1, 1, v0, v1, v0, v0   ; F8001C3F 0100
s_endpgm ; BF81

After:

v_cvt_pkrtz_f16_f32_e32 v0, v0, v1   ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3   ; 5E020702
v_cvt_pkrtz_f16_f32_e32 v2, v4, v5   ; 5E040B04
v_cvt_pkrtz_f16_f32_e32 v3, v6, v7   ; 5E060F06
exp 15, 0, 1, 0, 0, v0, v1, v0, v0   ; F800040F 0100
v_cvt_pkrtz_f16_f32_e32 v4, v8, v9   ; 5E081308
v_cvt_pkrtz_f16_f32_e32 v5, v10, v11 ; 5E0A170A
exp 15, 1, 1, 0, 0, v2, v3, v0, v0   ; F800041F 0302
v_cvt_pkrtz_f16_f32_e32 v6, v12, v13 ; 5E0C1B0C
v_cvt_pkrtz_f16_f32_e32 v7, v14, v15 ; 5E0E1F0E
exp 15, 2, 1, 0, 0, v4, v5, v0, v0   ; F800042F 0504
exp 15, 3, 1, 1, 1, v6, v7, v0, v0   ; F8001C3F 0706
s_endpgm ; BF81
---
 src/gallium/drivers/radeonsi/si_shader.c | 44 ++--
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 41bcbd4..adf706c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2917,9 +2917,14 @@ static void si_llvm_emit_vs_epilogue(struct 
lp_build_tgsi_context *bld_base)
FREE(outputs);
 }
 
+struct si_ps_exports {
+   unsigned num;
+   LLVMValueRef args[10][9];
+};
+
 static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
-  LLVMValueRef depth, LLVMValueRef stencil,
-  LLVMValueRef samplemask)
+   LLVMValueRef depth, LLVMValueRef stencil,
+   LLVMValueRef samplemask, struct si_ps_exports *exp)
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
struct lp_build_context *base = _base->base;
@@ -2965,14 +2970,13 @@ static void si_export_mrt_z(struct 
lp_build_tgsi_context *bld_base,
/* Specify which components to enable */
args[0] = lp_build_const_int32(base->gallivm, mask);
 
-   lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
-  ctx->voidt, args, 9, 0);
+   memcpy(exp->args[exp->num++], args, sizeof(args));
 }
 
 static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
LLVMValueRef *color, unsigned index,
unsigned samplemask_param,
-   bool is_last)
+   bool is_last, struct si_ps_exports *exp)
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
struct lp_build_context *base = _base->base;
@@ -3018,8 +3022,7 @@ static void si_export_mrt_color(struct 
lp_build_tgsi_context *bld_base,
} else if (args[c][0] == bld_base->uint_bld.zero)
continue; /* unnecessary NULL export */
 
-   lp_build_intrinsic(base->gallivm->builder, 
"llvm.SI.export",
-  ctx->voidt, args[c], 9, 0);
+   memcpy(exp->args[exp->num++], args[c], sizeof(args[c]));
}
} else {
LLVMValueRef args[9];
@@ -3033,11 +3036,19 @@ static void si_export_mrt_color(struct 
lp_build_tgsi_context *bld_base,
} else if (args[0] == bld_base->uint_bld.zero)
return; /* unnecessary NULL export */
 
-   lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
-  ctx->voidt, args, 9, 0);
+   memcpy(exp->args[exp->num++], args, sizeof(args));
}
 }
 
+static void si_emit_ps_exports(struct si_shader_context *ctx,
+  struct si_ps_exports *exp)
+{
+   for (unsigned i = 0; i < exp->num; i++)
+   lp_build_intrinsic(ctx->radeon_bld.gallivm.builder,
+  "llvm.SI.export", ctx->voidt,
+  exp->args[i], 9, 0);
+}
+
 static void si_export_null(struct