Re: [Mesa-dev] [PATCH 2/2] ac/nir: don't write tcs outputs to LDS that aren't read back.

2017-11-21 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Mon, Nov 20, 2017 at 2:57 AM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> If the TCS doesn't read back the outputs, no need to store them
> to LDS in the first place. (except for tess factors).
>
> This seems to give about 50fps (3290->3330) with tessellation demo.
>
> I haven't tested if it impacts DoW3 at all.
>
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 17 -
>  1 file changed, 16 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 1ecdeca..b58bae9 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -149,6 +149,9 @@ struct nir_to_llvm_context {
> unsigned tes_primitive_mode;
> uint64_t tess_outputs_written;
> uint64_t tess_patch_outputs_written;
> +
> +   uint32_t tcs_patch_outputs_read;
> +   uint64_t tcs_outputs_read;
>  };
>
>  static inline struct nir_to_llvm_context *
> @@ -2789,7 +2792,15 @@ store_tcs_output(struct nir_to_llvm_context *ctx,
> const unsigned comp = instr->variables[0]->var->data.location_frac;
> const bool per_vertex = 
> nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
> const bool is_compact = instr->variables[0]->var->data.compact;
> +   bool store_lds = true;
>
> +   if (instr->variables[0]->var->data.patch) {
> +   if (!(ctx->tcs_patch_outputs_read & (1U << 
> instr->variables[0]->var->data.location)))
> +   store_lds = false;
> +   } else {
> +   if (!(ctx->tcs_outputs_read & (1ULL << 
> instr->variables[0]->var->data.location)))
> +   store_lds = false;
> +   }
> get_deref_offset(ctx->nir, instr->variables[0],
>  false, NULL, per_vertex ? _index : NULL,
>  _index, _index);
> @@ -2826,7 +2837,8 @@ store_tcs_output(struct nir_to_llvm_context *ctx,
> continue;
> LLVMValueRef value = llvm_extract_elem(>ac, src, chan - 
> comp);
>
> -   ac_lds_store(>ac, dw_addr, value);
> +   if (store_lds || is_tess_factor)
> +   ac_lds_store(>ac, dw_addr, value);
>
> if (!is_tess_factor && writemask != 0xF)
> ac_build_buffer_store_dword(>ac, 
> ctx->hs_ring_tess_offchip, value, 1,
> @@ -6550,6 +6562,9 @@ LLVMModuleRef 
> ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
> ctx.gs_next_vertex = ac_build_alloca(, 
> ctx.ac.i32, "gs_next_vertex");
>
> ctx.gs_max_out_vertices = 
> shaders[i]->info.gs.vertices_out;
> +   } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
> +   ctx.tcs_outputs_read = shaders[i]->info.outputs_read;
> +   ctx.tcs_patch_outputs_read = 
> shaders[i]->info.patch_outputs_read;
> } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
> ctx.tes_primitive_mode = 
> shaders[i]->info.tess.primitive_mode;
> } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
> --
> 2.9.5
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] ac/nir: don't write tcs outputs to LDS that aren't read back.

2017-11-19 Thread Dave Airlie
From: Dave Airlie 

If the TCS doesn't read back the outputs, no need to store them
to LDS in the first place. (except for tess factors).

This seems to give about 50fps (3290->3330) with tessellation demo.

I haven't tested if it impacts DoW3 at all.

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_nir_to_llvm.c | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 1ecdeca..b58bae9 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -149,6 +149,9 @@ struct nir_to_llvm_context {
unsigned tes_primitive_mode;
uint64_t tess_outputs_written;
uint64_t tess_patch_outputs_written;
+
+   uint32_t tcs_patch_outputs_read;
+   uint64_t tcs_outputs_read;
 };
 
 static inline struct nir_to_llvm_context *
@@ -2789,7 +2792,15 @@ store_tcs_output(struct nir_to_llvm_context *ctx,
const unsigned comp = instr->variables[0]->var->data.location_frac;
const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, 
ctx->stage);
const bool is_compact = instr->variables[0]->var->data.compact;
+   bool store_lds = true;
 
+   if (instr->variables[0]->var->data.patch) {
+   if (!(ctx->tcs_patch_outputs_read & (1U << 
instr->variables[0]->var->data.location)))
+   store_lds = false;
+   } else {
+   if (!(ctx->tcs_outputs_read & (1ULL << 
instr->variables[0]->var->data.location)))
+   store_lds = false;
+   }
get_deref_offset(ctx->nir, instr->variables[0],
 false, NULL, per_vertex ? _index : NULL,
 _index, _index);
@@ -2826,7 +2837,8 @@ store_tcs_output(struct nir_to_llvm_context *ctx,
continue;
LLVMValueRef value = llvm_extract_elem(>ac, src, chan - 
comp);
 
-   ac_lds_store(>ac, dw_addr, value);
+   if (store_lds || is_tess_factor)
+   ac_lds_store(>ac, dw_addr, value);
 
if (!is_tess_factor && writemask != 0xF)
ac_build_buffer_store_dword(>ac, 
ctx->hs_ring_tess_offchip, value, 1,
@@ -6550,6 +6562,9 @@ LLVMModuleRef 
ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
ctx.gs_next_vertex = ac_build_alloca(, 
ctx.ac.i32, "gs_next_vertex");
 
ctx.gs_max_out_vertices = 
shaders[i]->info.gs.vertices_out;
+   } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
+   ctx.tcs_outputs_read = shaders[i]->info.outputs_read;
+   ctx.tcs_patch_outputs_read = 
shaders[i]->info.patch_outputs_read;
} else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
ctx.tes_primitive_mode = 
shaders[i]->info.tess.primitive_mode;
} else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] ac/nir: don't write tcs outputs to LDS that aren't read back.

2017-11-15 Thread Nicolai Hähnle

On 14.11.2017 06:18, Dave Airlie wrote:

From: Dave Airlie 

If the TCS doesn't read back the outputs, no need to store them
to LDS in the first place. (except for tess factors).

This seems to give about 50fps (3290->3330) with tessellation demo.

I haven't tested if it impacts DoW3 at all.

Signed-off-by: Dave Airlie 


Reviewed-by: Nicolai Hähnle 


---
  src/amd/common/ac_nir_to_llvm.c | 17 -
  1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 3d9f613..e7133ec 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -150,6 +150,9 @@ struct nir_to_llvm_context {
unsigned tes_primitive_mode;
uint64_t tess_outputs_written;
uint64_t tess_patch_outputs_written;
+
+   uint32_t tcs_patch_outputs_read;
+   uint64_t tcs_outputs_read;
  };
  
  static inline struct nir_to_llvm_context *

@@ -2790,7 +2793,15 @@ store_tcs_output(struct nir_to_llvm_context *ctx,
const unsigned comp = instr->variables[0]->var->data.location_frac;
const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, 
ctx->stage);
const bool is_compact = instr->variables[0]->var->data.compact;
+   bool store_lds = true;
  
+	if (instr->variables[0]->var->data.patch) {

+   if (!(ctx->tcs_patch_outputs_read & (1U << 
instr->variables[0]->var->data.location)))
+   store_lds = false;
+   } else {
+   if (!(ctx->tcs_outputs_read & (1ULL << 
instr->variables[0]->var->data.location)))
+   store_lds = false;
+   }
get_deref_offset(ctx->nir, instr->variables[0],
 false, NULL, per_vertex ? _index : NULL,
 _index, _index);
@@ -2827,7 +2838,8 @@ store_tcs_output(struct nir_to_llvm_context *ctx,
continue;
LLVMValueRef value = llvm_extract_elem(>ac, src, chan - 
comp);
  
-		ac_lds_store(>ac, dw_addr, value);

+   if (store_lds || is_tess_factor)
+   ac_lds_store(>ac, dw_addr, value);
  
  		if (!is_tess_factor && writemask != 0xF)

ac_build_buffer_store_dword(>ac, 
ctx->hs_ring_tess_offchip, value, 1,
@@ -6550,6 +6562,9 @@ LLVMModuleRef 
ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
ctx.gs_next_vertex = ac_build_alloca(, ctx.ac.i32, 
"gs_next_vertex");
  
  			ctx.gs_max_out_vertices = shaders[i]->info.gs.vertices_out;

+   } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
+   ctx.tcs_outputs_read = shaders[i]->info.outputs_read;
+   ctx.tcs_patch_outputs_read = 
shaders[i]->info.patch_outputs_read;
} else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
ctx.tes_primitive_mode = 
shaders[i]->info.tess.primitive_mode;
} else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] ac/nir: don't write tcs outputs to LDS that aren't read back.

2017-11-13 Thread Dave Airlie
From: Dave Airlie 

If the TCS doesn't read back the outputs, no need to store them
to LDS in the first place. (except for tess factors).

This seems to give about 50fps (3290->3330) with tessellation demo.

I haven't tested if it impacts DoW3 at all.

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_nir_to_llvm.c | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 3d9f613..e7133ec 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -150,6 +150,9 @@ struct nir_to_llvm_context {
unsigned tes_primitive_mode;
uint64_t tess_outputs_written;
uint64_t tess_patch_outputs_written;
+
+   uint32_t tcs_patch_outputs_read;
+   uint64_t tcs_outputs_read;
 };
 
 static inline struct nir_to_llvm_context *
@@ -2790,7 +2793,15 @@ store_tcs_output(struct nir_to_llvm_context *ctx,
const unsigned comp = instr->variables[0]->var->data.location_frac;
const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, 
ctx->stage);
const bool is_compact = instr->variables[0]->var->data.compact;
+   bool store_lds = true;
 
+   if (instr->variables[0]->var->data.patch) {
+   if (!(ctx->tcs_patch_outputs_read & (1U << 
instr->variables[0]->var->data.location)))
+   store_lds = false;
+   } else {
+   if (!(ctx->tcs_outputs_read & (1ULL << 
instr->variables[0]->var->data.location)))
+   store_lds = false;
+   }
get_deref_offset(ctx->nir, instr->variables[0],
 false, NULL, per_vertex ? _index : NULL,
 _index, _index);
@@ -2827,7 +2838,8 @@ store_tcs_output(struct nir_to_llvm_context *ctx,
continue;
LLVMValueRef value = llvm_extract_elem(>ac, src, chan - 
comp);
 
-   ac_lds_store(>ac, dw_addr, value);
+   if (store_lds || is_tess_factor)
+   ac_lds_store(>ac, dw_addr, value);
 
if (!is_tess_factor && writemask != 0xF)
ac_build_buffer_store_dword(>ac, 
ctx->hs_ring_tess_offchip, value, 1,
@@ -6550,6 +6562,9 @@ LLVMModuleRef 
ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
ctx.gs_next_vertex = ac_build_alloca(, 
ctx.ac.i32, "gs_next_vertex");
 
ctx.gs_max_out_vertices = 
shaders[i]->info.gs.vertices_out;
+   } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
+   ctx.tcs_outputs_read = shaders[i]->info.outputs_read;
+   ctx.tcs_patch_outputs_read = 
shaders[i]->info.patch_outputs_read;
} else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
ctx.tes_primitive_mode = 
shaders[i]->info.tess.primitive_mode;
} else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev