Re: [Mesa-dev] [PATCH] radv/ac: enable loop unrolling.

2017-02-27 Thread Tom Stellard
On Fri, Feb 24, 2017 at 03:30:50PM -0800, Matt Arsenault wrote:
> 
> > On Feb 24, 2017, at 14:39, Marek Olšák  wrote:
> > 
> > On Fri, Feb 24, 2017 at 7:20 PM, Matt Arsenault  wrote:
> >> 
> >> On Feb 24, 2017, at 01:45, Marek Olšák  wrote:
> >> 
> >> The main requirement is that if there is indirect indexing inside a
> >> loop, we always want to unroll the whole loop to get rid of the
> >> indexing, which can decrease scratch usage.
> >> 
> >> Marek
> >> 
> >> We boost the unroll thresholds when there is private memory indexed by the
> >> induction variable. See AMDGPUTTIImpl::getUnrollingPreferences
> > 
> > When Samuel Pitoiset was experimenting with the same code as this
> > patch but for radeonsi, getUnrollingPreferences wasn't even getting
> > called when unrolling. I guess he eventually gave up or didn't see any
> > positive effect from it.
> > 
> > Marek
> 
> Then there’s a bug somewhere. It should be getting called

It's possible TargetTransformInfo isn't being setup correctly by the
mesa pass pipeline.

-Tom

> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv/ac: enable loop unrolling. (v2)

2017-02-26 Thread Michael Schellenberger Costa
Hi Dave,

-Ursprüngliche Nachricht-
Von: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] Im Auftrag von 
Dave Airlie
Gesendet: Freitag, 24. Februar 2017 04:59
An: mesa-dev@lists.freedesktop.org
Betreff: [Mesa-dev] [PATCH] radv/ac: enable loop unrolling. (v2)

From: Dave Airlie 

This enables LLVM loop unrolling.

v2: limit unroll count to 32, don't fully unroll. (arsenm)

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_llvm_helper.cpp | 22 ++
 src/amd/common/ac_llvm_util.h |  1 +
 src/amd/common/ac_nir_to_llvm.c   | 26 --
 3 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_llvm_helper.cpp 
b/src/amd/common/ac_llvm_helper.cpp
index 594339e..85b0cbf 100644
--- a/src/amd/common/ac_llvm_helper.cpp
+++ b/src/amd/common/ac_llvm_helper.cpp
@@ -36,7 +36,9 @@
 #include 
 #include 
 #include 
+#include 
 
+using namespace llvm;

If you have to use the namespace you should  adopt the other lines below too

 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)  {
llvm::Argument *A = llvm::unwrap(val);
@@ -53,3 +55,23 @@ bool ac_is_sgpr_param(LLVMValueRef arg)
return AS.hasAttribute(ArgNo + 1, llvm::Attribute::ByVal) ||
   AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);  }
   
--Michael

+
+// MetadataAsValue uses a canonical format which strips the actual 
+MDNode for // MDNode with just a single constant value, storing just a 
+ConstantAsMetadata // This undoes this canonicalization, reconstructing the 
MDNode.
+static MDNode *extractMDNode(MetadataAsValue *MAV) {
+   Metadata *MD = MAV->getMetadata();
+   assert((isa(MD) || isa(MD)) &&
+  "Expected a metadata node or a canonicalized constant");
+
+   if (MDNode *N = dyn_cast(MD))
+   return N;
+   assert(0);
+   return MDNode::get(MAV->getContext(), MD); }
+
+void ac_metadata_point_op0_to_itself(LLVMValueRef v) {
+   MDNode *node = extractMDNode(unwrap(v));
+   node->replaceOperandWith(0, node);
+}
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h 
index 1f37a12..0d6c53c 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -48,6 +48,7 @@ LLVMTargetMachineRef ac_create_target_machine(enum 
radeon_family family, bool su
 
 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);  bool 
ac_is_sgpr_param(LLVMValueRef param);
+void ac_metadata_point_op0_to_itself(LLVMValueRef v);
 
 void
 ac_add_function_attr(LLVMValueRef function, diff --git 
a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 
9778581..d7a9a7b 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3950,6 +3950,23 @@ static void visit_if(struct nir_to_llvm_context *ctx, 
nir_if *if_stmt)
LLVMPositionBuilderAtEnd(ctx->builder, merge_block);  }
 
+static void set_unroll_metadata(struct nir_to_llvm_context *ctx,
+  LLVMValueRef br)
+{
+   unsigned kind = LLVMGetMDKindIDInContext(ctx->context, "llvm.loop", 9);
+   LLVMValueRef md_unroll;
+   LLVMValueRef part_arg = LLVMMDStringInContext(ctx->context, 
"llvm.loop.unroll.count", 22);
+   LLVMValueRef count_arg = LLVMConstInt(ctx->i32, 32, false);
+   LLVMValueRef args[2] = {part_arg, count_arg};
+   LLVMValueRef count = LLVMMDNodeInContext(ctx->context, args, 2);
+
+   LLVMValueRef md_args[] = {NULL, count};
+   md_unroll = LLVMMDNodeInContext(ctx->context, md_args, 2);
+   ac_metadata_point_op0_to_itself(md_unroll);
+
+   LLVMSetMetadata(br, kind, md_unroll);
+}
+
 static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop)  {
LLVMBasicBlockRef continue_parent = ctx->continue_block; @@ -3964,8 
+3981,10 @@ static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop 
*loop)
LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block);
visit_cf_list(ctx, &loop->body);
 
-   if (LLVMGetInsertBlock(ctx->builder))
-   LLVMBuildBr(ctx->builder, ctx->continue_block);
+   if (LLVMGetInsertBlock(ctx->builder)) {
+   LLVMValueRef loop = LLVMBuildBr(ctx->builder, 
ctx->continue_block);
+   set_unroll_metadata(ctx, loop);
+   }
LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block);
 
ctx->continue_block = continue_parent; @@ -4827,10 +4846,13 @@ static 
void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
 
/* Add some optimization passes */
LLVMAddScalarReplAggregatesPass(passmgr);
+   LLVMAddLoopRotatePass(passmgr);
LLVMAddLICMPass(passmgr);
LLVMAddAggressiveDCEPass(passmgr);
LLVMAddCFGSimplificationPass(passmgr);
LLVMAddIns

Re: [Mesa-dev] [PATCH] radv/ac: enable loop unrolling.

2017-02-24 Thread Matt Arsenault

> On Feb 24, 2017, at 14:39, Marek Olšák  wrote:
> 
> On Fri, Feb 24, 2017 at 7:20 PM, Matt Arsenault  wrote:
>> 
>> On Feb 24, 2017, at 01:45, Marek Olšák  wrote:
>> 
>> The main requirement is that if there is indirect indexing inside a
>> loop, we always want to unroll the whole loop to get rid of the
>> indexing, which can decrease scratch usage.
>> 
>> Marek
>> 
>> We boost the unroll thresholds when there is private memory indexed by the
>> induction variable. See AMDGPUTTIImpl::getUnrollingPreferences
> 
> When Samuel Pitoiset was experimenting with the same code as this
> patch but for radeonsi, getUnrollingPreferences wasn't even getting
> called when unrolling. I guess he eventually gave up or didn't see any
> positive effect from it.
> 
> Marek

Then there’s a bug somewhere. It should be getting called
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv/ac: enable loop unrolling.

2017-02-24 Thread Marek Olšák
On Fri, Feb 24, 2017 at 7:20 PM, Matt Arsenault  wrote:
>
> On Feb 24, 2017, at 01:45, Marek Olšák  wrote:
>
> The main requirement is that if there is indirect indexing inside a
> loop, we always want to unroll the whole loop to get rid of the
> indexing, which can decrease scratch usage.
>
> Marek
>
> We boost the unroll thresholds when there is private memory indexed by the
> induction variable. See AMDGPUTTIImpl::getUnrollingPreferences

When Samuel Pitoiset was experimenting with the same code as this
patch but for radeonsi, getUnrollingPreferences wasn't even getting
called when unrolling. I guess he eventually gave up or didn't see any
positive effect from it.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv/ac: enable loop unrolling.

2017-02-24 Thread Matt Arsenault

> On Feb 24, 2017, at 01:45, Marek Olšák  wrote:
> 
> The main requirement is that if there is indirect indexing inside a
> loop, we always want to unroll the whole loop to get rid of the
> indexing, which can decrease scratch usage.
> 
> Marek
We boost the unroll thresholds when there is private memory indexed by the 
induction variable. See AMDGPUTTIImpl::getUnrollingPreferences___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv/ac: enable loop unrolling.

2017-02-24 Thread Marek Olšák
On Fri, Feb 24, 2017 at 5:36 AM, Matt Arsenault  wrote:
>
> On Feb 23, 2017, at 19:44, Dave Airlie  wrote:
>
> On 24 February 2017 at 13:36, Matt Arsenault  wrote:
>
>
> On Feb 23, 2017, at 19:27, Dave Airlie  wrote:
>
> +static void set_unroll_metadata(struct nir_to_llvm_context *ctx,
> +LLVMValueRef br)
> +{
> + unsigned kind = LLVMGetMDKindIDInContext(ctx->context, "llvm.loop", 9);
> + LLVMValueRef md_unroll;
> + LLVMValueRef full_arg = LLVMMDStringInContext(ctx->context,
> "llvm.loop.unroll.full", 21);
> + LLVMValueRef full = LLVMMDNodeInContext(ctx->context, &full_arg, 1);
> +
> + LLVMValueRef md_args[] = {NULL, full};
> + md_unroll = LLVMMDNodeInContext(ctx->context, md_args, 2);
> + ac_metadata_point_op0_to_itself(md_unroll);
> +
> + LLVMSetMetadata(br, kind, md_unroll);
> +}
> +
>
>
> Why are you forcing full unrolling of all loops?
>
>
> Because I copied Marek's code with little idea of what llvm does.
>
> Should I just drop the full bits, perhaps set a llvm.loop.unroll.count = 32?
>
> Dave.
>
>
> The question is more why are you using the unroll metadata at all? It’s for
> implementing user hints like pragma unroll. By default the backend
> heuristics should be making these decisions. If this is helping benchmarks
> then that’s a datapoint that we need to play with those and increase the
> thresholds or something.

The main requirement is that if there is indirect indexing inside a
loop, we always want to unroll the whole loop to get rid of the
indexing, which can decrease scratch usage.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv/ac: enable loop unrolling.

2017-02-23 Thread Matt Arsenault

> On Feb 23, 2017, at 19:44, Dave Airlie  wrote:
> 
> On 24 February 2017 at 13:36, Matt Arsenault  > wrote:
>> 
>> On Feb 23, 2017, at 19:27, Dave Airlie  wrote:
>> 
>> +static void set_unroll_metadata(struct nir_to_llvm_context *ctx,
>> +LLVMValueRef br)
>> +{
>> + unsigned kind = LLVMGetMDKindIDInContext(ctx->context, "llvm.loop", 9);
>> + LLVMValueRef md_unroll;
>> + LLVMValueRef full_arg = LLVMMDStringInContext(ctx->context,
>> "llvm.loop.unroll.full", 21);
>> + LLVMValueRef full = LLVMMDNodeInContext(ctx->context, &full_arg, 1);
>> +
>> + LLVMValueRef md_args[] = {NULL, full};
>> + md_unroll = LLVMMDNodeInContext(ctx->context, md_args, 2);
>> + ac_metadata_point_op0_to_itself(md_unroll);
>> +
>> + LLVMSetMetadata(br, kind, md_unroll);
>> +}
>> +
>> 
>> 
>> Why are you forcing full unrolling of all loops?
> 
> Because I copied Marek's code with little idea of what llvm does.
> 
> Should I just drop the full bits, perhaps set a llvm.loop.unroll.count = 32?
> 
> Dave.

The question is more why are you using the unroll metadata at all? It’s for 
implementing user hints like pragma unroll. By default the backend heuristics 
should be making these decisions. If this is helping benchmarks then that’s a 
datapoint that we need to play with those and increase the thresholds or 
something.

-Matt___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv/ac: enable loop unrolling. (v2)

2017-02-23 Thread Dave Airlie
From: Dave Airlie 

This enables LLVM loop unrolling.

v2: limit unroll count to 32, don't fully unroll. (arsenm)

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_llvm_helper.cpp | 22 ++
 src/amd/common/ac_llvm_util.h |  1 +
 src/amd/common/ac_nir_to_llvm.c   | 26 --
 3 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_llvm_helper.cpp 
b/src/amd/common/ac_llvm_helper.cpp
index 594339e..85b0cbf 100644
--- a/src/amd/common/ac_llvm_helper.cpp
+++ b/src/amd/common/ac_llvm_helper.cpp
@@ -36,7 +36,9 @@
 #include 
 #include 
 #include 
+#include 
 
+using namespace llvm;
 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
 {
llvm::Argument *A = llvm::unwrap(val);
@@ -53,3 +55,23 @@ bool ac_is_sgpr_param(LLVMValueRef arg)
return AS.hasAttribute(ArgNo + 1, llvm::Attribute::ByVal) ||
   AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
 }
+
+// MetadataAsValue uses a canonical format which strips the actual MDNode for
+// MDNode with just a single constant value, storing just a ConstantAsMetadata
+// This undoes this canonicalization, reconstructing the MDNode.
+static MDNode *extractMDNode(MetadataAsValue *MAV) {
+   Metadata *MD = MAV->getMetadata();
+   assert((isa(MD) || isa(MD)) &&
+  "Expected a metadata node or a canonicalized constant");
+
+   if (MDNode *N = dyn_cast(MD))
+   return N;
+   assert(0);
+   return MDNode::get(MAV->getContext(), MD);
+}
+
+void ac_metadata_point_op0_to_itself(LLVMValueRef v)
+{
+   MDNode *node = extractMDNode(unwrap(v));
+   node->replaceOperandWith(0, node);
+}
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index 1f37a12..0d6c53c 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -48,6 +48,7 @@ LLVMTargetMachineRef ac_create_target_machine(enum 
radeon_family family, bool su
 
 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
 bool ac_is_sgpr_param(LLVMValueRef param);
+void ac_metadata_point_op0_to_itself(LLVMValueRef v);
 
 void
 ac_add_function_attr(LLVMValueRef function,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 9778581..d7a9a7b 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3950,6 +3950,23 @@ static void visit_if(struct nir_to_llvm_context *ctx, 
nir_if *if_stmt)
LLVMPositionBuilderAtEnd(ctx->builder, merge_block);
 }
 
+static void set_unroll_metadata(struct nir_to_llvm_context *ctx,
+  LLVMValueRef br)
+{
+   unsigned kind = LLVMGetMDKindIDInContext(ctx->context, "llvm.loop", 9);
+   LLVMValueRef md_unroll;
+   LLVMValueRef part_arg = LLVMMDStringInContext(ctx->context, 
"llvm.loop.unroll.count", 22);
+   LLVMValueRef count_arg = LLVMConstInt(ctx->i32, 32, false);
+   LLVMValueRef args[2] = {part_arg, count_arg};
+   LLVMValueRef count = LLVMMDNodeInContext(ctx->context, args, 2);
+
+   LLVMValueRef md_args[] = {NULL, count};
+   md_unroll = LLVMMDNodeInContext(ctx->context, md_args, 2);
+   ac_metadata_point_op0_to_itself(md_unroll);
+
+   LLVMSetMetadata(br, kind, md_unroll);
+}
+
 static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop)
 {
LLVMBasicBlockRef continue_parent = ctx->continue_block;
@@ -3964,8 +3981,10 @@ static void visit_loop(struct nir_to_llvm_context *ctx, 
nir_loop *loop)
LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block);
visit_cf_list(ctx, &loop->body);
 
-   if (LLVMGetInsertBlock(ctx->builder))
-   LLVMBuildBr(ctx->builder, ctx->continue_block);
+   if (LLVMGetInsertBlock(ctx->builder)) {
+   LLVMValueRef loop = LLVMBuildBr(ctx->builder, 
ctx->continue_block);
+   set_unroll_metadata(ctx, loop);
+   }
LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block);
 
ctx->continue_block = continue_parent;
@@ -4827,10 +4846,13 @@ static void ac_llvm_finalize_module(struct 
nir_to_llvm_context * ctx)
 
/* Add some optimization passes */
LLVMAddScalarReplAggregatesPass(passmgr);
+   LLVMAddLoopRotatePass(passmgr);
LLVMAddLICMPass(passmgr);
LLVMAddAggressiveDCEPass(passmgr);
LLVMAddCFGSimplificationPass(passmgr);
LLVMAddInstructionCombiningPass(passmgr);
+   LLVMAddIndVarSimplifyPass(passmgr);
+   LLVMAddLoopUnrollPass(passmgr);
 
/* Run the pass */
LLVMInitializeFunctionPassManager(passmgr);
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv/ac: enable loop unrolling.

2017-02-23 Thread Dave Airlie
On 24 February 2017 at 13:36, Matt Arsenault  wrote:
>
> On Feb 23, 2017, at 19:27, Dave Airlie  wrote:
>
> +static void set_unroll_metadata(struct nir_to_llvm_context *ctx,
> +LLVMValueRef br)
> +{
> + unsigned kind = LLVMGetMDKindIDInContext(ctx->context, "llvm.loop", 9);
> + LLVMValueRef md_unroll;
> + LLVMValueRef full_arg = LLVMMDStringInContext(ctx->context,
> "llvm.loop.unroll.full", 21);
> + LLVMValueRef full = LLVMMDNodeInContext(ctx->context, &full_arg, 1);
> +
> + LLVMValueRef md_args[] = {NULL, full};
> + md_unroll = LLVMMDNodeInContext(ctx->context, md_args, 2);
> + ac_metadata_point_op0_to_itself(md_unroll);
> +
> + LLVMSetMetadata(br, kind, md_unroll);
> +}
> +
>
>
> Why are you forcing full unrolling of all loops?

Because I copied Marek's code with little idea of what llvm does.

Should I just drop the full bits, perhaps set a llvm.loop.unroll.count = 32?

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv/ac: enable loop unrolling.

2017-02-23 Thread Matt Arsenault

> On Feb 23, 2017, at 19:27, Dave Airlie  wrote:
> 
> +static void set_unroll_metadata(struct nir_to_llvm_context *ctx,
> +LLVMValueRef br)
> +{
> + unsigned kind = LLVMGetMDKindIDInContext(ctx->context, "llvm.loop", 9);
> + LLVMValueRef md_unroll;
> + LLVMValueRef full_arg = LLVMMDStringInContext(ctx->context, 
> "llvm.loop.unroll.full", 21);
> + LLVMValueRef full = LLVMMDNodeInContext(ctx->context, &full_arg, 1);
> +
> + LLVMValueRef md_args[] = {NULL, full};
> + md_unroll = LLVMMDNodeInContext(ctx->context, md_args, 2);
> + ac_metadata_point_op0_to_itself(md_unroll);
> +
> + LLVMSetMetadata(br, kind, md_unroll);
> +}
> +

Why are you forcing full unrolling of all loops?

-Matt___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv/ac: enable loop unrolling.

2017-02-23 Thread Dave Airlie
On 24 February 2017 at 13:27, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This enables LLVM loop unrolling.

Meant to also say it's based on a radeonsi patch Marek wrote.

Dave.
>
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/common/ac_llvm_helper.cpp | 22 ++
>  src/amd/common/ac_llvm_util.h |  1 +
>  src/amd/common/ac_nir_to_llvm.c   | 24 ++--
>  3 files changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_helper.cpp 
> b/src/amd/common/ac_llvm_helper.cpp
> index 594339e..85b0cbf 100644
> --- a/src/amd/common/ac_llvm_helper.cpp
> +++ b/src/amd/common/ac_llvm_helper.cpp
> @@ -36,7 +36,9 @@
>  #include 
>  #include 
>  #include 
> +#include 
>
> +using namespace llvm;
>  void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
>  {
> llvm::Argument *A = llvm::unwrap(val);
> @@ -53,3 +55,23 @@ bool ac_is_sgpr_param(LLVMValueRef arg)
> return AS.hasAttribute(ArgNo + 1, llvm::Attribute::ByVal) ||
>AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
>  }
> +
> +// MetadataAsValue uses a canonical format which strips the actual MDNode for
> +// MDNode with just a single constant value, storing just a 
> ConstantAsMetadata
> +// This undoes this canonicalization, reconstructing the MDNode.
> +static MDNode *extractMDNode(MetadataAsValue *MAV) {
> +   Metadata *MD = MAV->getMetadata();
> +   assert((isa(MD) || isa(MD)) &&
> +  "Expected a metadata node or a canonicalized constant");
> +
> +   if (MDNode *N = dyn_cast(MD))
> +   return N;
> +   assert(0);
> +   return MDNode::get(MAV->getContext(), MD);
> +}
> +
> +void ac_metadata_point_op0_to_itself(LLVMValueRef v)
> +{
> +   MDNode *node = extractMDNode(unwrap(v));
> +   node->replaceOperandWith(0, node);
> +}
> diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
> index 1f37a12..0d6c53c 100644
> --- a/src/amd/common/ac_llvm_util.h
> +++ b/src/amd/common/ac_llvm_util.h
> @@ -48,6 +48,7 @@ LLVMTargetMachineRef ac_create_target_machine(enum 
> radeon_family family, bool su
>
>  void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
>  bool ac_is_sgpr_param(LLVMValueRef param);
> +void ac_metadata_point_op0_to_itself(LLVMValueRef v);
>
>  void
>  ac_add_function_attr(LLVMValueRef function,
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 9778581..0e20aa1 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -3950,6 +3950,21 @@ static void visit_if(struct nir_to_llvm_context *ctx, 
> nir_if *if_stmt)
> LLVMPositionBuilderAtEnd(ctx->builder, merge_block);
>  }
>
> +static void set_unroll_metadata(struct nir_to_llvm_context *ctx,
> +  LLVMValueRef br)
> +{
> +   unsigned kind = LLVMGetMDKindIDInContext(ctx->context, "llvm.loop", 
> 9);
> +   LLVMValueRef md_unroll;
> +   LLVMValueRef full_arg = LLVMMDStringInContext(ctx->context, 
> "llvm.loop.unroll.full", 21);
> +   LLVMValueRef full = LLVMMDNodeInContext(ctx->context, &full_arg, 1);
> +
> +   LLVMValueRef md_args[] = {NULL, full};
> +   md_unroll = LLVMMDNodeInContext(ctx->context, md_args, 2);
> +   ac_metadata_point_op0_to_itself(md_unroll);
> +
> +   LLVMSetMetadata(br, kind, md_unroll);
> +}
> +
>  static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop)
>  {
> LLVMBasicBlockRef continue_parent = ctx->continue_block;
> @@ -3964,8 +3979,10 @@ static void visit_loop(struct nir_to_llvm_context 
> *ctx, nir_loop *loop)
> LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block);
> visit_cf_list(ctx, &loop->body);
>
> -   if (LLVMGetInsertBlock(ctx->builder))
> -   LLVMBuildBr(ctx->builder, ctx->continue_block);
> +   if (LLVMGetInsertBlock(ctx->builder)) {
> +   LLVMValueRef loop = LLVMBuildBr(ctx->builder, 
> ctx->continue_block);
> +   set_unroll_metadata(ctx, loop);
> +   }
> LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block);
>
> ctx->continue_block = continue_parent;
> @@ -4827,10 +4844,13 @@ static void ac_llvm_finalize_module(struct 
> nir_to_llvm_context * ctx)
>
> /* Add some optimization passes */
> LLVMAddScalarReplAggregatesPass(passmgr);
> +   LLVMAddLoopRotatePass(passmgr);
> LLVMAddLICMPass(passmgr);
> LLVMAddAggressiveDCEPass(passmgr);
> LLVMAddCFGSimplificationPass(passmgr);
> LLVMAddInstructionCombiningPass(passmgr);
> +   LLVMAddIndVarSimplifyPass(passmgr);
> +   LLVMAddLoopUnrollPass(passmgr);
>
> /* Run the pass */
> LLVMInitializeFunctionPassManager(passmgr);
> --
> 2.9.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
__

[Mesa-dev] [PATCH] radv/ac: enable loop unrolling.

2017-02-23 Thread Dave Airlie
From: Dave Airlie 

This enables LLVM loop unrolling.

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_llvm_helper.cpp | 22 ++
 src/amd/common/ac_llvm_util.h |  1 +
 src/amd/common/ac_nir_to_llvm.c   | 24 ++--
 3 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_llvm_helper.cpp 
b/src/amd/common/ac_llvm_helper.cpp
index 594339e..85b0cbf 100644
--- a/src/amd/common/ac_llvm_helper.cpp
+++ b/src/amd/common/ac_llvm_helper.cpp
@@ -36,7 +36,9 @@
 #include 
 #include 
 #include 
+#include 
 
+using namespace llvm;
 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
 {
llvm::Argument *A = llvm::unwrap(val);
@@ -53,3 +55,23 @@ bool ac_is_sgpr_param(LLVMValueRef arg)
return AS.hasAttribute(ArgNo + 1, llvm::Attribute::ByVal) ||
   AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
 }
+
+// MetadataAsValue uses a canonical format which strips the actual MDNode for
+// MDNode with just a single constant value, storing just a ConstantAsMetadata
+// This undoes this canonicalization, reconstructing the MDNode.
+static MDNode *extractMDNode(MetadataAsValue *MAV) {
+   Metadata *MD = MAV->getMetadata();
+   assert((isa(MD) || isa(MD)) &&
+  "Expected a metadata node or a canonicalized constant");
+
+   if (MDNode *N = dyn_cast(MD))
+   return N;
+   assert(0);
+   return MDNode::get(MAV->getContext(), MD);
+}
+
+void ac_metadata_point_op0_to_itself(LLVMValueRef v)
+{
+   MDNode *node = extractMDNode(unwrap(v));
+   node->replaceOperandWith(0, node);
+}
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index 1f37a12..0d6c53c 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -48,6 +48,7 @@ LLVMTargetMachineRef ac_create_target_machine(enum 
radeon_family family, bool su
 
 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
 bool ac_is_sgpr_param(LLVMValueRef param);
+void ac_metadata_point_op0_to_itself(LLVMValueRef v);
 
 void
 ac_add_function_attr(LLVMValueRef function,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 9778581..0e20aa1 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3950,6 +3950,21 @@ static void visit_if(struct nir_to_llvm_context *ctx, 
nir_if *if_stmt)
LLVMPositionBuilderAtEnd(ctx->builder, merge_block);
 }
 
+static void set_unroll_metadata(struct nir_to_llvm_context *ctx,
+  LLVMValueRef br)
+{
+   unsigned kind = LLVMGetMDKindIDInContext(ctx->context, "llvm.loop", 9);
+   LLVMValueRef md_unroll;
+   LLVMValueRef full_arg = LLVMMDStringInContext(ctx->context, 
"llvm.loop.unroll.full", 21);
+   LLVMValueRef full = LLVMMDNodeInContext(ctx->context, &full_arg, 1);
+
+   LLVMValueRef md_args[] = {NULL, full};
+   md_unroll = LLVMMDNodeInContext(ctx->context, md_args, 2);
+   ac_metadata_point_op0_to_itself(md_unroll);
+
+   LLVMSetMetadata(br, kind, md_unroll);
+}
+
 static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop)
 {
LLVMBasicBlockRef continue_parent = ctx->continue_block;
@@ -3964,8 +3979,10 @@ static void visit_loop(struct nir_to_llvm_context *ctx, 
nir_loop *loop)
LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block);
visit_cf_list(ctx, &loop->body);
 
-   if (LLVMGetInsertBlock(ctx->builder))
-   LLVMBuildBr(ctx->builder, ctx->continue_block);
+   if (LLVMGetInsertBlock(ctx->builder)) {
+   LLVMValueRef loop = LLVMBuildBr(ctx->builder, 
ctx->continue_block);
+   set_unroll_metadata(ctx, loop);
+   }
LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block);
 
ctx->continue_block = continue_parent;
@@ -4827,10 +4844,13 @@ static void ac_llvm_finalize_module(struct 
nir_to_llvm_context * ctx)
 
/* Add some optimization passes */
LLVMAddScalarReplAggregatesPass(passmgr);
+   LLVMAddLoopRotatePass(passmgr);
LLVMAddLICMPass(passmgr);
LLVMAddAggressiveDCEPass(passmgr);
LLVMAddCFGSimplificationPass(passmgr);
LLVMAddInstructionCombiningPass(passmgr);
+   LLVMAddIndVarSimplifyPass(passmgr);
+   LLVMAddLoopUnrollPass(passmgr);
 
/* Run the pass */
LLVMInitializeFunctionPassManager(passmgr);
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev