Re: [Mesa-dev] [PATCH] ac/nir: rewrite shared variable handling (v2)

2017-07-17 Thread Connor Abbott
Pushed!

On Thu, Jul 13, 2017 at 3:39 PM, Connor Abbott
 wrote:
> From: Connor Abbott 
>
> Translate the NIR variables directly to LLVM instead of lowering to a
> TGSI-style giant array of vec4's and then back to a variable. This
> should fix indirect dereferences, make shared variables more tightly
> packed, and make LLVM's alias analysis more precise. This should fix an
> upcoming Feral title, which has a compute shader that was failing to
> compile because the extra padding made us run out of LDS space.
>
> v2: Combine the previous two patches into one, only use this for shared
> variables for now until LLVM becomes smarter.
>
> Cc: Alex Smith 
> Reviewed-by: Bas Nieuwenhuizen 
> ---
> Alex: I made this by squashing together two patches from my series, so it
> should still work, but I'd still like to get your Tested-by before I push to
> make sure I didn't screw something up.
>
>  src/amd/common/ac_nir_to_llvm.c | 245 
> ++--
>  1 file changed, 158 insertions(+), 87 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 9223310..9a69066 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -65,6 +65,7 @@ struct nir_to_llvm_context {
>
> struct hash_table *defs;
> struct hash_table *phis;
> +   struct hash_table *vars;
>
> LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
> LLVMValueRef ring_offsets;
> @@ -154,7 +155,6 @@ struct nir_to_llvm_context {
> LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
> LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
>
> -   LLVMValueRef shared_memory;
> uint64_t input_mask;
> uint64_t output_mask;
> int num_locals;
> @@ -387,23 +387,6 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, 
> int num_elements)
>CONST_ADDR_SPACE);
>  }
>
> -static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,
> - int idx,
> - LLVMTypeRef type)
> -{
> -   LLVMValueRef offset;
> -   LLVMValueRef ptr;
> -   int addr_space;
> -
> -   offset = LLVMConstInt(ctx->i32, idx * 16, false);
> -
> -   ptr = ctx->shared_memory;
> -   ptr = LLVMBuildGEP(ctx->builder, ptr, , 1, "");
> -   addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
> -   ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, 
> addr_space), "");
> -   return ptr;
> -}
> -
>  static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, 
> LLVMTypeRef t)
>  {
> if (t == ctx->f16 || t == ctx->i16)
> @@ -2905,6 +2888,45 @@ load_gs_input(struct nir_to_llvm_context *ctx,
> return result;
>  }
>
> +static LLVMValueRef
> +build_gep_for_deref(struct nir_to_llvm_context *ctx,
> +   nir_deref_var *deref)
> +{
> +   struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, 
> deref->var);
> +   assert(entry->data);
> +   LLVMValueRef val = entry->data;
> +   nir_deref *tail = deref->deref.child;
> +   while (tail != NULL) {
> +   LLVMValueRef offset;
> +   switch (tail->deref_type) {
> +   case nir_deref_type_array: {
> +   nir_deref_array *array = nir_deref_as_array(tail);
> +   offset = LLVMConstInt(ctx->i32, array->base_offset, 
> 0);
> +   if (array->deref_array_type ==
> +   nir_deref_array_type_indirect) {
> +   offset = LLVMBuildAdd(ctx->builder, offset,
> + get_src(ctx,
> + 
> array->indirect),
> + "");
> +   }
> +   break;
> +   }
> +   case nir_deref_type_struct: {
> +   nir_deref_struct *deref_struct =
> +   nir_deref_as_struct(tail);
> +   offset = LLVMConstInt(ctx->i32,
> + deref_struct->index, 0);
> +   break;
> +   }
> +   default:
> +   unreachable("bad deref type");
> +   }
> +   val = ac_build_gep0(>ac, val, offset);
> +   tail = tail->child;
> +   }
> +   return val;
> +}
> +
>  static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
>nir_intrinsic_instr *instr)
>  {
> @@ -2966,6 +2988,14 @@ static LLVMValueRef visit_load_var(struct 
> nir_to_llvm_context *ctx,
> }
> }
> break;
> +   case 

Re: [Mesa-dev] [PATCH] ac/nir: rewrite shared variable handling (v2)

2017-07-16 Thread Nicolai Hähnle

On 14.07.2017 00:39, Connor Abbott wrote:

From: Connor Abbott 

Translate the NIR variables directly to LLVM instead of lowering to a
TGSI-style giant array of vec4's and then back to a variable. This
should fix indirect dereferences, make shared variables more tightly
packed, and make LLVM's alias analysis more precise. This should fix an
upcoming Feral title, which has a compute shader that was failing to
compile because the extra padding made us run out of LDS space.

v2: Combine the previous two patches into one, only use this for shared
variables for now until LLVM becomes smarter.

Cc: Alex Smith 
Reviewed-by: Bas Nieuwenhuizen 


Reviewed-by: Nicolai Hähnle 



---
Alex: I made this by squashing together two patches from my series, so it
should still work, but I'd still like to get your Tested-by before I push to
make sure I didn't screw something up.

  src/amd/common/ac_nir_to_llvm.c | 245 ++--
  1 file changed, 158 insertions(+), 87 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 9223310..9a69066 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -65,6 +65,7 @@ struct nir_to_llvm_context {
  
  	struct hash_table *defs;

struct hash_table *phis;
+   struct hash_table *vars;
  
  	LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];

LLVMValueRef ring_offsets;
@@ -154,7 +155,6 @@ struct nir_to_llvm_context {
LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
  
-	LLVMValueRef shared_memory;

uint64_t input_mask;
uint64_t output_mask;
int num_locals;
@@ -387,23 +387,6 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, int 
num_elements)
   CONST_ADDR_SPACE);
  }
  
-static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,

- int idx,
- LLVMTypeRef type)
-{
-   LLVMValueRef offset;
-   LLVMValueRef ptr;
-   int addr_space;
-
-   offset = LLVMConstInt(ctx->i32, idx * 16, false);
-
-   ptr = ctx->shared_memory;
-   ptr = LLVMBuildGEP(ctx->builder, ptr, , 1, "");
-   addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
-   ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), 
"");
-   return ptr;
-}
-
  static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, 
LLVMTypeRef t)
  {
if (t == ctx->f16 || t == ctx->i16)
@@ -2905,6 +2888,45 @@ load_gs_input(struct nir_to_llvm_context *ctx,
return result;
  }
  
+static LLVMValueRef

+build_gep_for_deref(struct nir_to_llvm_context *ctx,
+   nir_deref_var *deref)
+{
+   struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, 
deref->var);
+   assert(entry->data);
+   LLVMValueRef val = entry->data;
+   nir_deref *tail = deref->deref.child;
+   while (tail != NULL) {
+   LLVMValueRef offset;
+   switch (tail->deref_type) {
+   case nir_deref_type_array: {
+   nir_deref_array *array = nir_deref_as_array(tail);
+   offset = LLVMConstInt(ctx->i32, array->base_offset, 0);
+   if (array->deref_array_type ==
+   nir_deref_array_type_indirect) {
+   offset = LLVMBuildAdd(ctx->builder, offset,
+ get_src(ctx,
+ array->indirect),
+ "");
+   }
+   break;
+   }
+   case nir_deref_type_struct: {
+   nir_deref_struct *deref_struct =
+   nir_deref_as_struct(tail);
+   offset = LLVMConstInt(ctx->i32,
+ deref_struct->index, 0);
+   break;
+   }
+   default:
+   unreachable("bad deref type");
+   }
+   val = ac_build_gep0(>ac, val, offset);
+   tail = tail->child;
+   }
+   return val;
+}
+
  static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
   nir_intrinsic_instr *instr)
  {
@@ -2966,6 +2988,14 @@ static LLVMValueRef visit_load_var(struct 
nir_to_llvm_context *ctx,
}
}
break;
+   case nir_var_shared: {
+   LLVMValueRef address = build_gep_for_deref(ctx,
+  instr->variables[0]);
+   LLVMValueRef val = 

Re: [Mesa-dev] [PATCH] ac/nir: rewrite shared variable handling (v2)

2017-07-14 Thread Alex Smith
On 13 July 2017 at 23:39, Connor Abbott  wrote:
> From: Connor Abbott 
>
> Translate the NIR variables directly to LLVM instead of lowering to a
> TGSI-style giant array of vec4's and then back to a variable. This
> should fix indirect dereferences, make shared variables more tightly
> packed, and make LLVM's alias analysis more precise. This should fix an
> upcoming Feral title, which has a compute shader that was failing to
> compile because the extra padding made us run out of LDS space.
>
> v2: Combine the previous two patches into one, only use this for shared
> variables for now until LLVM becomes smarter.
>
> Cc: Alex Smith 
> Reviewed-by: Bas Nieuwenhuizen 
> ---
> Alex: I made this by squashing together two patches from my series, so it
> should still work, but I'd still like to get your Tested-by before I push to
> make sure I didn't screw something up.

Looks good to me, thanks!

Tested-by: Alex Smith 

>
>  src/amd/common/ac_nir_to_llvm.c | 245 
> ++--
>  1 file changed, 158 insertions(+), 87 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 9223310..9a69066 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -65,6 +65,7 @@ struct nir_to_llvm_context {
>
> struct hash_table *defs;
> struct hash_table *phis;
> +   struct hash_table *vars;
>
> LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
> LLVMValueRef ring_offsets;
> @@ -154,7 +155,6 @@ struct nir_to_llvm_context {
> LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
> LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
>
> -   LLVMValueRef shared_memory;
> uint64_t input_mask;
> uint64_t output_mask;
> int num_locals;
> @@ -387,23 +387,6 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, 
> int num_elements)
>CONST_ADDR_SPACE);
>  }
>
> -static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,
> - int idx,
> - LLVMTypeRef type)
> -{
> -   LLVMValueRef offset;
> -   LLVMValueRef ptr;
> -   int addr_space;
> -
> -   offset = LLVMConstInt(ctx->i32, idx * 16, false);
> -
> -   ptr = ctx->shared_memory;
> -   ptr = LLVMBuildGEP(ctx->builder, ptr, , 1, "");
> -   addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
> -   ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, 
> addr_space), "");
> -   return ptr;
> -}
> -
>  static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, 
> LLVMTypeRef t)
>  {
> if (t == ctx->f16 || t == ctx->i16)
> @@ -2905,6 +2888,45 @@ load_gs_input(struct nir_to_llvm_context *ctx,
> return result;
>  }
>
> +static LLVMValueRef
> +build_gep_for_deref(struct nir_to_llvm_context *ctx,
> +   nir_deref_var *deref)
> +{
> +   struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, 
> deref->var);
> +   assert(entry->data);
> +   LLVMValueRef val = entry->data;
> +   nir_deref *tail = deref->deref.child;
> +   while (tail != NULL) {
> +   LLVMValueRef offset;
> +   switch (tail->deref_type) {
> +   case nir_deref_type_array: {
> +   nir_deref_array *array = nir_deref_as_array(tail);
> +   offset = LLVMConstInt(ctx->i32, array->base_offset, 
> 0);
> +   if (array->deref_array_type ==
> +   nir_deref_array_type_indirect) {
> +   offset = LLVMBuildAdd(ctx->builder, offset,
> + get_src(ctx,
> + 
> array->indirect),
> + "");
> +   }
> +   break;
> +   }
> +   case nir_deref_type_struct: {
> +   nir_deref_struct *deref_struct =
> +   nir_deref_as_struct(tail);
> +   offset = LLVMConstInt(ctx->i32,
> + deref_struct->index, 0);
> +   break;
> +   }
> +   default:
> +   unreachable("bad deref type");
> +   }
> +   val = ac_build_gep0(>ac, val, offset);
> +   tail = tail->child;
> +   }
> +   return val;
> +}
> +
>  static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
>nir_intrinsic_instr *instr)
>  {
> @@ -2966,6 +2988,14 @@ static LLVMValueRef visit_load_var(struct 
> nir_to_llvm_context *ctx,
>   

[Mesa-dev] [PATCH] ac/nir: rewrite shared variable handling (v2)

2017-07-13 Thread Connor Abbott
From: Connor Abbott 

Translate the NIR variables directly to LLVM instead of lowering to a
TGSI-style giant array of vec4's and then back to a variable. This
should fix indirect dereferences, make shared variables more tightly
packed, and make LLVM's alias analysis more precise. This should fix an
upcoming Feral title, which has a compute shader that was failing to
compile because the extra padding made us run out of LDS space.

v2: Combine the previous two patches into one, only use this for shared
variables for now until LLVM becomes smarter.

Cc: Alex Smith 
Reviewed-by: Bas Nieuwenhuizen 
---
Alex: I made this by squashing together two patches from my series, so it
should still work, but I'd still like to get your Tested-by before I push to
make sure I didn't screw something up.

 src/amd/common/ac_nir_to_llvm.c | 245 ++--
 1 file changed, 158 insertions(+), 87 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 9223310..9a69066 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -65,6 +65,7 @@ struct nir_to_llvm_context {
 
struct hash_table *defs;
struct hash_table *phis;
+   struct hash_table *vars;
 
LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
LLVMValueRef ring_offsets;
@@ -154,7 +155,6 @@ struct nir_to_llvm_context {
LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
 
-   LLVMValueRef shared_memory;
uint64_t input_mask;
uint64_t output_mask;
int num_locals;
@@ -387,23 +387,6 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, int 
num_elements)
   CONST_ADDR_SPACE);
 }
 
-static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,
- int idx,
- LLVMTypeRef type)
-{
-   LLVMValueRef offset;
-   LLVMValueRef ptr;
-   int addr_space;
-
-   offset = LLVMConstInt(ctx->i32, idx * 16, false);
-
-   ptr = ctx->shared_memory;
-   ptr = LLVMBuildGEP(ctx->builder, ptr, , 1, "");
-   addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
-   ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, 
addr_space), "");
-   return ptr;
-}
-
 static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, 
LLVMTypeRef t)
 {
if (t == ctx->f16 || t == ctx->i16)
@@ -2905,6 +2888,45 @@ load_gs_input(struct nir_to_llvm_context *ctx,
return result;
 }
 
+static LLVMValueRef
+build_gep_for_deref(struct nir_to_llvm_context *ctx,
+   nir_deref_var *deref)
+{
+   struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, 
deref->var);
+   assert(entry->data);
+   LLVMValueRef val = entry->data;
+   nir_deref *tail = deref->deref.child;
+   while (tail != NULL) {
+   LLVMValueRef offset;
+   switch (tail->deref_type) {
+   case nir_deref_type_array: {
+   nir_deref_array *array = nir_deref_as_array(tail);
+   offset = LLVMConstInt(ctx->i32, array->base_offset, 0);
+   if (array->deref_array_type ==
+   nir_deref_array_type_indirect) {
+   offset = LLVMBuildAdd(ctx->builder, offset,
+ get_src(ctx,
+ array->indirect),
+ "");
+   }
+   break;
+   }
+   case nir_deref_type_struct: {
+   nir_deref_struct *deref_struct =
+   nir_deref_as_struct(tail);
+   offset = LLVMConstInt(ctx->i32,
+ deref_struct->index, 0);
+   break;
+   }
+   default:
+   unreachable("bad deref type");
+   }
+   val = ac_build_gep0(>ac, val, offset);
+   tail = tail->child;
+   }
+   return val;
+}
+
 static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
   nir_intrinsic_instr *instr)
 {
@@ -2966,6 +2988,14 @@ static LLVMValueRef visit_load_var(struct 
nir_to_llvm_context *ctx,
}
}
break;
+   case nir_var_shared: {
+   LLVMValueRef address = build_gep_for_deref(ctx,
+  instr->variables[0]);
+   LLVMValueRef val = LLVMBuildLoad(ctx->builder, address, "");
+   return LLVMBuildBitCast(ctx->builder, val,
+