[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-10-04 Thread Roman Lebedev via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG03bd5198b6f7: [OldPM] Pass manager: run SROA after (simple) 
loop unrolling (authored by lebedev.ri).

Changed prior to commit:
  https://reviews.llvm.org/D87972?vs=296005=296028#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

Files:
  clang/test/CodeGenCXX/union-tbaa2.cpp
  clang/test/Misc/loop-opt-setup.c
  llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
  llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
  llvm/test/Other/opt-O2-pipeline.ll
  llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
  llvm/test/Other/opt-O3-pipeline.ll
  llvm/test/Other/opt-Os-pipeline.ll
  llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll

Index: llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll
===
--- llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll
+++ llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll
@@ -22,55 +22,21 @@
 %"struct.std::array" = type { [6 x i32] }
 
 define dso_local void @_Z3fooi(i32 %cnt) {
-; OLDPM-LABEL: @_Z3fooi(
-; OLDPM-NEXT:  entry:
-; OLDPM-NEXT:[[ARR:%.*]] = alloca %"struct.std::array", align 16
-; OLDPM-NEXT:[[TMP0:%.*]] = bitcast %"struct.std::array"* [[ARR]] to i8*
-; OLDPM-NEXT:call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull [[TMP0]])
-; OLDPM-NEXT:[[ARRAYDECAY_I_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 0
-; OLDPM-NEXT:[[INCDEC_PTR:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 1
-; OLDPM-NEXT:[[INCDEC_PTR_1:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 2
-; OLDPM-NEXT:[[INCDEC_PTR_2:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 3
-; OLDPM-NEXT:[[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[CNT:%.*]], i32 0
-; OLDPM-NEXT:[[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
-; OLDPM-NEXT:[[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], 
-; OLDPM-NEXT:[[TMP4:%.*]] = bitcast %"struct.std::array"* [[ARR]] to <4 x i32>*
-; OLDPM-NEXT:store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 16
-; OLDPM-NEXT:[[INCDEC_PTR_3:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 4
-; OLDPM-NEXT:[[INC_4:%.*]] = add nsw i32 [[CNT]], 5
-; OLDPM-NEXT:store i32 [[INC_4]], i32* [[INCDEC_PTR_3]], align 16
-; OLDPM-NEXT:[[INCDEC_PTR_4:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 5
-; OLDPM-NEXT:[[INC_5:%.*]] = add nsw i32 [[CNT]], 6
-; OLDPM-NEXT:store i32 [[INC_5]], i32* [[INCDEC_PTR_4]], align 4
-; OLDPM-NEXT:[[TMP5:%.*]] = load i32, i32* [[ARRAYDECAY_I_I_I]], align 16
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP5]])
-; OLDPM-NEXT:[[TMP6:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP6]])
-; OLDPM-NEXT:[[TMP7:%.*]] = load i32, i32* [[INCDEC_PTR_1]], align 8
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP7]])
-; OLDPM-NEXT:[[TMP8:%.*]] = load i32, i32* [[INCDEC_PTR_2]], align 4
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP8]])
-; OLDPM-NEXT:[[TMP9:%.*]] = load i32, i32* [[INCDEC_PTR_3]], align 16
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP9]])
-; OLDPM-NEXT:call void @_Z3usei(i32 [[INC_5]])
-; OLDPM-NEXT:call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull [[TMP0]])
-; OLDPM-NEXT:ret void
-;
-; NEWPM-LABEL: @_Z3fooi(
-; NEWPM-NEXT:  entry:
-; NEWPM-NEXT:[[INC:%.*]] = add nsw i32 [[CNT:%.*]], 1
-; NEWPM-NEXT:[[INC_1:%.*]] = add nsw i32 [[CNT]], 2
-; NEWPM-NEXT:[[INC_2:%.*]] = add nsw i32 [[CNT]], 3
-; NEWPM-NEXT:[[INC_3:%.*]] = add nsw i32 [[CNT]], 4
-; NEWPM-NEXT:[[INC_4:%.*]] = add nsw i32 [[CNT]], 5
-; NEWPM-NEXT:[[INC_5:%.*]] = add nsw i32 [[CNT]], 6
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_1]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_2]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_3]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_4]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_5]])
-; NEWPM-NEXT:ret void
+; CHECK-LABEL: @_Z3fooi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[INC:%.*]] = add nsw i32 [[CNT:%.*]], 1
+; CHECK-NEXT:[[INC_1:%.*]] = add nsw i32 [[CNT]], 2
+; CHECK-NEXT:[[INC_2:%.*]] = add nsw i32 [[CNT]], 3
+; CHECK-NEXT:[[INC_3:%.*]] = add nsw i32 [[CNT]], 4
+; CHECK-NEXT:[[INC_4:%.*]] = add nsw i32 [[CNT]], 5
+; CHECK-NEXT:[[INC_5:%.*]] = add nsw i32 [[CNT]], 6
+; CHECK-NEXT:call void @_Z3usei(i32 [[INC]])
+; CHECK-NEXT:call void 

[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-10-04 Thread Roman Lebedev via Phabricator via cfe-commits
lebedev.ri added a comment.

In D87972#2310603 , @xbolva00 wrote:

>> ! In D87972#2310595 , @nikic wrote:
>>
>>> I'll just say this LGTM as it establishes parity with what NewPM has been 
>>> doing for a while already.
>
> +1

Thank you. 
I'm gonna just land this as is then.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-10-04 Thread Dávid Bolvanský via Phabricator via cfe-commits
xbolva00 accepted this revision.
xbolva00 added a comment.

>> I'll just say this LGTM as it establishes parity with what NewPM has been 
>> doing for a while already.

+1


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-10-04 Thread Nikita Popov via Phabricator via cfe-commits
nikic accepted this revision.
nikic added a comment.
This revision is now accepted and ready to land.

I'll just say this LGTM as it establishes parity with what NewPM has been doing 
for a while already.

Reviewers, in the future, please reject any patches that only change the NewPM 
pipeline or only change the LegacyPM pipeline, unless there is some good 
technical reason to do so. If there was one here, it was not mentioned in the 
original patch.




Comment at: clang/test/CodeGenCXX/union-tbaa2.cpp:1
-// RUN: %clang_cc1 %s -O2 -fno-experimental-new-pass-manager -std=c++11 
-triple x86_64-unknown-linux-gnu -target-cpu x86-64 -target-feature +sse4.2 
-target-feature +avx -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -O1 -fno-experimental-new-pass-manager -std=c++11 
-triple x86_64-unknown-linux-gnu -target-cpu x86-64 -target-feature +sse4.2 
-target-feature +avx -emit-llvm -o - | FileCheck %s
 

Remove `-fno-experimental-new-pass-manager `? It was added to work around the 
NewPM/LegacyPM discrepancy.



Comment at: clang/test/Misc/loop-opt-setup.c:2
+// RUN: %clang -O1 -fexperimental-new-pass-manager -fno-unroll-loops -S -o - 
%s -emit-llvm | FileCheck %s -check-prefixes=CHECK-ALL,CHECK-NEWPM
+// RUN: %clang -O1 -fno-experimental-new-pass-manager -fno-unroll-loops -S -o 
- %s -emit-llvm | FileCheck %s -check-prefixes=CHECK-ALL,CHECK-NEWPM
 extern int a[16];

xbolva00 wrote:
> OLDPM?
Remove the NewPM/OldPM tests now that behavior is the same?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-10-03 Thread Roman Lebedev via Phabricator via cfe-commits
lebedev.ri updated this revision to Diff 296005.
lebedev.ri added a comment.

Re-fix clang tests


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

Files:
  clang/test/CodeGenCXX/union-tbaa2.cpp
  clang/test/Misc/loop-opt-setup.c
  llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
  llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
  llvm/test/Other/opt-O2-pipeline.ll
  llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
  llvm/test/Other/opt-O3-pipeline.ll
  llvm/test/Other/opt-Os-pipeline.ll
  llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll

Index: llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll
===
--- llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll
+++ llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll
@@ -22,55 +22,21 @@
 %"struct.std::array" = type { [6 x i32] }
 
 define dso_local void @_Z3fooi(i32 %cnt) {
-; OLDPM-LABEL: @_Z3fooi(
-; OLDPM-NEXT:  entry:
-; OLDPM-NEXT:[[ARR:%.*]] = alloca %"struct.std::array", align 16
-; OLDPM-NEXT:[[TMP0:%.*]] = bitcast %"struct.std::array"* [[ARR]] to i8*
-; OLDPM-NEXT:call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull [[TMP0]])
-; OLDPM-NEXT:[[ARRAYDECAY_I_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 0
-; OLDPM-NEXT:[[INCDEC_PTR:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 1
-; OLDPM-NEXT:[[INCDEC_PTR_1:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 2
-; OLDPM-NEXT:[[INCDEC_PTR_2:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 3
-; OLDPM-NEXT:[[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[CNT:%.*]], i32 0
-; OLDPM-NEXT:[[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
-; OLDPM-NEXT:[[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], 
-; OLDPM-NEXT:[[TMP4:%.*]] = bitcast %"struct.std::array"* [[ARR]] to <4 x i32>*
-; OLDPM-NEXT:store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 16
-; OLDPM-NEXT:[[INCDEC_PTR_3:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 4
-; OLDPM-NEXT:[[INC_4:%.*]] = add nsw i32 [[CNT]], 5
-; OLDPM-NEXT:store i32 [[INC_4]], i32* [[INCDEC_PTR_3]], align 16
-; OLDPM-NEXT:[[INCDEC_PTR_4:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 5
-; OLDPM-NEXT:[[INC_5:%.*]] = add nsw i32 [[CNT]], 6
-; OLDPM-NEXT:store i32 [[INC_5]], i32* [[INCDEC_PTR_4]], align 4
-; OLDPM-NEXT:[[TMP5:%.*]] = load i32, i32* [[ARRAYDECAY_I_I_I]], align 16
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP5]])
-; OLDPM-NEXT:[[TMP6:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP6]])
-; OLDPM-NEXT:[[TMP7:%.*]] = load i32, i32* [[INCDEC_PTR_1]], align 8
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP7]])
-; OLDPM-NEXT:[[TMP8:%.*]] = load i32, i32* [[INCDEC_PTR_2]], align 4
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP8]])
-; OLDPM-NEXT:[[TMP9:%.*]] = load i32, i32* [[INCDEC_PTR_3]], align 16
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP9]])
-; OLDPM-NEXT:call void @_Z3usei(i32 [[INC_5]])
-; OLDPM-NEXT:call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull [[TMP0]])
-; OLDPM-NEXT:ret void
-;
-; NEWPM-LABEL: @_Z3fooi(
-; NEWPM-NEXT:  entry:
-; NEWPM-NEXT:[[INC:%.*]] = add nsw i32 [[CNT:%.*]], 1
-; NEWPM-NEXT:[[INC_1:%.*]] = add nsw i32 [[CNT]], 2
-; NEWPM-NEXT:[[INC_2:%.*]] = add nsw i32 [[CNT]], 3
-; NEWPM-NEXT:[[INC_3:%.*]] = add nsw i32 [[CNT]], 4
-; NEWPM-NEXT:[[INC_4:%.*]] = add nsw i32 [[CNT]], 5
-; NEWPM-NEXT:[[INC_5:%.*]] = add nsw i32 [[CNT]], 6
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_1]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_2]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_3]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_4]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_5]])
-; NEWPM-NEXT:ret void
+; CHECK-LABEL: @_Z3fooi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[INC:%.*]] = add nsw i32 [[CNT:%.*]], 1
+; CHECK-NEXT:[[INC_1:%.*]] = add nsw i32 [[CNT]], 2
+; CHECK-NEXT:[[INC_2:%.*]] = add nsw i32 [[CNT]], 3
+; CHECK-NEXT:[[INC_3:%.*]] = add nsw i32 [[CNT]], 4
+; CHECK-NEXT:[[INC_4:%.*]] = add nsw i32 [[CNT]], 5
+; CHECK-NEXT:[[INC_5:%.*]] = add nsw i32 [[CNT]], 6
+; CHECK-NEXT:call void @_Z3usei(i32 [[INC]])
+; CHECK-NEXT:call void @_Z3usei(i32 [[INC_1]])
+; CHECK-NEXT:call void @_Z3usei(i32 [[INC_2]])
+; CHECK-NEXT:call void @_Z3usei(i32 [[INC_3]])
+; CHECK-NEXT:call void @_Z3usei(i32 [[INC_4]])

[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-10-02 Thread Dávid Bolvanský via Phabricator via cfe-commits
xbolva00 added a comment.

>> I'm not really sure what are my potential next steps here.

Maybe just add option to disable late SROA?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-10-02 Thread Roman Lebedev via Phabricator via cfe-commits
lebedev.ri added a comment.

In D87972#2294614 , @lebedev.ri wrote:

> In D87972#2294488 , @xbolva00 wrote:
>
 Does that sound reasonable?
>>
>> Yes IMHO.
>>
 What are the next suggested steps?
>>
>> It would be great to isolate and check the cases which regressed a bit.
>
> I've rerun my benchmark, and while the results are still the same (runtime 
> geomean -0.53%/-0.40%,
> but that obviously depends on the benchmarks), there are some obvious 
> outliers:
> F13059172: image.png 
> F13059175: rsbench.txt 
> I'll try to take a look at that, assuming it's not noise.

Hmm. So i did just take a look, manually re-benchmarking each of these, and 
while i still see a few small improvements,
the regressions there are all appear to be basically noise. Not what i was 
hoping for :/

In D87972#2284060 , @MaskRay wrote:

> I have tested this patch internally and seen gains and losses. On one 
> document search related benchmark 3~5% improvement. One zippy (snappy) there 
> is 3~5% regression. Perhaps we do need a conditional extra SROA run.

Does it look like one of the scary "branch predictor got confused"/"code layout 
changed causing different alignment"?

I'm not really sure what are my potential next steps here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-10-02 Thread Roman Lebedev via Phabricator via cfe-commits
lebedev.ri updated this revision to Diff 295817.
lebedev.ri added a comment.
Herald added a subscriber: pengfei.

Rebased, NFC


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

Files:
  llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
  llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
  llvm/test/Other/opt-O2-pipeline.ll
  llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
  llvm/test/Other/opt-O3-pipeline.ll
  llvm/test/Other/opt-Os-pipeline.ll
  llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll

Index: llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll
===
--- llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll
+++ llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll
@@ -22,55 +22,21 @@
 %"struct.std::array" = type { [6 x i32] }
 
 define dso_local void @_Z3fooi(i32 %cnt) {
-; OLDPM-LABEL: @_Z3fooi(
-; OLDPM-NEXT:  entry:
-; OLDPM-NEXT:[[ARR:%.*]] = alloca %"struct.std::array", align 16
-; OLDPM-NEXT:[[TMP0:%.*]] = bitcast %"struct.std::array"* [[ARR]] to i8*
-; OLDPM-NEXT:call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull [[TMP0]])
-; OLDPM-NEXT:[[ARRAYDECAY_I_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 0
-; OLDPM-NEXT:[[INCDEC_PTR:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 1
-; OLDPM-NEXT:[[INCDEC_PTR_1:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 2
-; OLDPM-NEXT:[[INCDEC_PTR_2:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 3
-; OLDPM-NEXT:[[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[CNT:%.*]], i32 0
-; OLDPM-NEXT:[[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
-; OLDPM-NEXT:[[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], 
-; OLDPM-NEXT:[[TMP4:%.*]] = bitcast %"struct.std::array"* [[ARR]] to <4 x i32>*
-; OLDPM-NEXT:store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 16
-; OLDPM-NEXT:[[INCDEC_PTR_3:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 4
-; OLDPM-NEXT:[[INC_4:%.*]] = add nsw i32 [[CNT]], 5
-; OLDPM-NEXT:store i32 [[INC_4]], i32* [[INCDEC_PTR_3]], align 16
-; OLDPM-NEXT:[[INCDEC_PTR_4:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 5
-; OLDPM-NEXT:[[INC_5:%.*]] = add nsw i32 [[CNT]], 6
-; OLDPM-NEXT:store i32 [[INC_5]], i32* [[INCDEC_PTR_4]], align 4
-; OLDPM-NEXT:[[TMP5:%.*]] = load i32, i32* [[ARRAYDECAY_I_I_I]], align 16
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP5]])
-; OLDPM-NEXT:[[TMP6:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP6]])
-; OLDPM-NEXT:[[TMP7:%.*]] = load i32, i32* [[INCDEC_PTR_1]], align 8
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP7]])
-; OLDPM-NEXT:[[TMP8:%.*]] = load i32, i32* [[INCDEC_PTR_2]], align 4
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP8]])
-; OLDPM-NEXT:[[TMP9:%.*]] = load i32, i32* [[INCDEC_PTR_3]], align 16
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP9]])
-; OLDPM-NEXT:call void @_Z3usei(i32 [[INC_5]])
-; OLDPM-NEXT:call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull [[TMP0]])
-; OLDPM-NEXT:ret void
-;
-; NEWPM-LABEL: @_Z3fooi(
-; NEWPM-NEXT:  entry:
-; NEWPM-NEXT:[[INC:%.*]] = add nsw i32 [[CNT:%.*]], 1
-; NEWPM-NEXT:[[INC_1:%.*]] = add nsw i32 [[CNT]], 2
-; NEWPM-NEXT:[[INC_2:%.*]] = add nsw i32 [[CNT]], 3
-; NEWPM-NEXT:[[INC_3:%.*]] = add nsw i32 [[CNT]], 4
-; NEWPM-NEXT:[[INC_4:%.*]] = add nsw i32 [[CNT]], 5
-; NEWPM-NEXT:[[INC_5:%.*]] = add nsw i32 [[CNT]], 6
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_1]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_2]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_3]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_4]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_5]])
-; NEWPM-NEXT:ret void
+; CHECK-LABEL: @_Z3fooi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[INC:%.*]] = add nsw i32 [[CNT:%.*]], 1
+; CHECK-NEXT:[[INC_1:%.*]] = add nsw i32 [[CNT]], 2
+; CHECK-NEXT:[[INC_2:%.*]] = add nsw i32 [[CNT]], 3
+; CHECK-NEXT:[[INC_3:%.*]] = add nsw i32 [[CNT]], 4
+; CHECK-NEXT:[[INC_4:%.*]] = add nsw i32 [[CNT]], 5
+; CHECK-NEXT:[[INC_5:%.*]] = add nsw i32 [[CNT]], 6
+; CHECK-NEXT:call void @_Z3usei(i32 [[INC]])
+; CHECK-NEXT:call void @_Z3usei(i32 [[INC_1]])
+; CHECK-NEXT:call void @_Z3usei(i32 [[INC_2]])
+; CHECK-NEXT:call void @_Z3usei(i32 [[INC_3]])
+; CHECK-NEXT:call void @_Z3usei(i32 [[INC_4]])
+; CHECK-NEXT:call void @_Z3usei(i32 

[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-25 Thread Roman Lebedev via Phabricator via cfe-commits
lebedev.ri added a comment.

In D87972#2294488 , @xbolva00 wrote:

>>> Does that sound reasonable?
>
> Yes IMHO.
>
>>> What are the next suggested steps?
>
> It would be great to isolate and check the cases which regressed a bit.

I've rerun my benchmark, and while the results are still the same (runtime 
geomean -0.53%/-0.40%,
but that obviously depends on the benchmarks), there are some obvious outliers:
F13059172: image.png 
F13059175: rsbench.txt 
I'll try to take a look at that, assuming it's not noise.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-25 Thread Dávid Bolvanský via Phabricator via cfe-commits
xbolva00 added a comment.

>> Does that sound reasonable?

Yes IMHO.

>> What are the next suggested steps?

It would be great to isolate and check the cases which regressed a bit.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-25 Thread Roman Lebedev via Phabricator via cfe-commits
lebedev.ri added a comment.

@MaskRay, @dmgreen & @sanwou01 thank you for running perf experiment!

I think all the results are consistent along the lines of "this sounds
generally reasonable (esp. given that new-pm does it already),
as usual results in ups, but seems to be a (small) geomean win overall".

Does that sound reasonable?
What are the next suggested steps?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-22 Thread Sanne Wouda via Phabricator via cfe-commits
sanwou01 added a comment.

SPEC 2017 on AArch64 is neutral on the geomean. The only slight worry is 
omnetpp with a 1% regression, but this is balanced by a .8% improvement on mcf. 
Other changes are in the noise.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-21 Thread Dave Green via Phabricator via cfe-commits
dmgreen added a comment.

> @dmgreen for arm?:)

This would seem more like a good general codegen cleanup than something that 
would be target dependent. It would probably be more dependent on the code that 
is being run, than the exact target. But yeah, I ran some baremetal tests. Only 
one changed (including in all the codesize tests), which was a nasty 
complicated state machine. It changed between -5% and +3.5%, depend on the cpu 
it ran on. (Unfortunately it went down on the cores I was more interested in).

I have run into this exact problem recently and very nearly put up a very 
similar patch for it. In that case it was making intrinsic MVE code much easier 
to write, as you could rely on loops not clogging up stack array uses after 
they were unrolled. The differences were not quite in the 160% range, but they 
would be nice improvements.

So, a little reluctantly, this does sound like a good idea to me. I asked Sanne 
to run Spec too if he has the time.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-21 Thread Florian Hahn via Phabricator via cfe-commits
fhahn added a comment.

In D87972#2284060 , @MaskRay wrote:

> I have tested this patch internally and seen gains and losses. On one 
> document search related benchmark 3~5% improvement. One zippy (snappy) there 
> is 3~5% regression. Perhaps we do need a conditional extra SROA run.

That's a bit of a surprise to me! It would be great to know how/why running 
SROA later makes things worse in some cases.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-21 Thread Dávid Bolvanský via Phabricator via cfe-commits
xbolva00 added a comment.

X86 data collected by @lebedev.ri looks good as well.

@greend for arm?:)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-21 Thread Stanislav Mekhanoshin via Phabricator via cfe-commits
rampitec added a comment.

This is obviously LGTM from the AMDGPU BE point of view, we did it ourselves.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-21 Thread Michael Liao via Phabricator via cfe-commits
hliao added a comment.

In D87972#2284096 , @lebedev.ri wrote:

> In D87972#2284064 , @xbolva00 wrote:
>
>> In D87972#2284060 , @MaskRay wrote:
>>
>>> I have tested this patch internally and seen gains and losses. On one 
>>> document search related benchmark 3~5% improvement. One zippy (snappy) 
>>> there is 3~5% regression. Perhaps we do need a conditional extra SROA run.
>>
>> Snappy  - you mean public https://github.com/google/snappy?
>>
>> Well, it should be possible to analyze it...
>
>
>
>> @lebedev.ri any perf data from testsuite/rawspeed?
>
> I did look. F13016699: sroa-after-unroll.rsbench.txt 
> 
> This suggests that geomean is `-0.8%` runtime improvement,
> with ups F13016722: image.png 
>
> But as i have said in the patch's description, i stumbled into this when 
> writing new code, where the effect is //much// larger.

We probably need to collect more performance data of more benchmarks on more 
platforms (different targets) to understand the impact. I hesitate to add 
https://reviews.llvm.org/rG1f4e7463b5e3ff654c84371527767830e51db10d as a 
generic one as some targets may have regressions due to potentially very 
different memory access patterns.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-21 Thread Dávid Bolvanský via Phabricator via cfe-commits
xbolva00 added inline comments.



Comment at: clang/test/Misc/loop-opt-setup.c:2
+// RUN: %clang -O1 -fexperimental-new-pass-manager -fno-unroll-loops -S -o - 
%s -emit-llvm | FileCheck %s -check-prefixes=CHECK-ALL,CHECK-NEWPM
+// RUN: %clang -O1 -fno-experimental-new-pass-manager -fno-unroll-loops -S -o 
- %s -emit-llvm | FileCheck %s -check-prefixes=CHECK-ALL,CHECK-NEWPM
 extern int a[16];

OLDPM?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-21 Thread Roman Lebedev via Phabricator via cfe-commits
lebedev.ri added a comment.

In D87972#2285176 , @arsenm wrote:

> I assume this makes 1f4e7463b5e3ff654c84371527767830e51db10d 
>  
> redundant?

Yes, see `llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp` change.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-21 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added a comment.

I assume this makes 1f4e7463b5e3ff654c84371527767830e51db10d 
 redundant?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-20 Thread Roman Lebedev via Phabricator via cfe-commits
lebedev.ri added a comment.

In D87972#2284064 , @xbolva00 wrote:

> In D87972#2284060 , @MaskRay wrote:
>
>> I have tested this patch internally and seen gains and losses. On one 
>> document search related benchmark 3~5% improvement. One zippy (snappy) there 
>> is 3~5% regression. Perhaps we do need a conditional extra SROA run.
>
> Snappy  - you mean public https://github.com/google/snappy?
>
> Well, it should be possible to analyze it...



> @lebedev.ri any perf data from testsuite/rawspeed?

I did look. F13016699: sroa-after-unroll.rsbench.txt 

This suggests that geomean is `-0.8%` runtime improvement,
with ups F13016722: image.png 

But as i have said in the patch's description, i stumbled into this when 
writing new code, where the effect is //much// larger.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-20 Thread Dávid Bolvanský via Phabricator via cfe-commits
xbolva00 added a comment.

In D87972#2284060 , @MaskRay wrote:

> I have tested this patch internally and seen gains and losses. On one 
> document search related benchmark 3~5% improvement. One zippy (snappy) there 
> is 3~5% regression. Perhaps we do need a conditional extra SROA run.

Snappy  - you mean public https://github.com/google/snappy?

Well, it should be possible to analyze it...

@lebedev.ri any perf data from testsuite/rawspeed?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-20 Thread Roman Lebedev via Phabricator via cfe-commits
lebedev.ri added a comment.

(I'm guessing that we are talking about run-time performance here.)

In D87972#2284060 , @MaskRay wrote:

> I have tested this patch internally and seen gains and losses. On one 
> document search related benchmark 3~5% improvement. One zippy (snappy) there 
> is 3~5% regression.

Yep, as usual.

> Perhaps we do need a conditional extra SROA run.

I think i don't understand the gist

If we don't run it in the cases we expect it wouldn't do anything,
it should still be run in the cases where it *does* do something,
so i'm not sure how conditioning it's run helps with anything.
(well, other than compile-time)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-20 Thread Dávid Bolvanský via Phabricator via cfe-commits
xbolva00 added a comment.

In D87972#2284060 , @MaskRay wrote:

> I have tested this patch internally and seen gains and losses. On one 
> document search related benchmark 3~5% improvement. One zippy (snappy) there 
> is 3~5% regression. Perhaps we do need a conditional extra SROA run.

Should be same story for NPM since NPM also enables SROA after unrolling.

A) Commit this patch and start working on general solution for LPM and NPM.

B) Ignore this patch. But after LLVM switches to NPM, you have same issue to 
solve anyway.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-20 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay added a comment.

I have tested this patch internally and seen gains and losses. On one document 
search related benchmark 3~5% improvement. One zippy (snappy) there is 3~5% 
regression. Perhaps we do need a conditional extra SROA run.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-20 Thread Dávid Bolvanský via Phabricator via cfe-commits
xbolva00 added a comment.

https://reviews.llvm.org/D68593 added late SROA to NPM so it would be good to 
enable it for LPM as well.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-20 Thread Florian Hahn via Phabricator via cfe-commits
fhahn added a comment.

Surprising this causes only such a small perf regression. I guess it should be 
OK given that, but here probably are some pathological cases out there where 
this may cause some noticeable compile-time regressions.

IIUC the additional cases this catches come mainly from fully unrolled loops. 
If only we had a better way to conditionally run passes :) Then we would 
ideally only run SROA (and other additional simplification passes) late on 
functions that had loops fully unrolled. One lightweight way to do so would be 
to have loop unroll add an 'additional-simplification' attribute to functions 
that contain loops which it full unrolled and have SROA just run again late if 
the attribute is present. A similar approach may be helpful in other places too 
(e.g. the off-by-default `-extra-vectorizer-passes` option, 
https://github.com/llvm/llvm-project/blob/master/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp#L774)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87972: [OldPM] Pass manager: run SROA after (simple) loop unrolling

2020-09-19 Thread Roman Lebedev via Phabricator via cfe-commits
lebedev.ri updated this revision to Diff 292984.
lebedev.ri added a comment.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Fixing a few clang tests and updating one more llvm test to check this also.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87972/new/

https://reviews.llvm.org/D87972

Files:
  clang/test/CodeGenCXX/union-tbaa2.cpp
  clang/test/Misc/loop-opt-setup.c
  llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
  llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
  llvm/test/Other/opt-O2-pipeline.ll
  llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
  llvm/test/Other/opt-O3-pipeline.ll
  llvm/test/Other/opt-Os-pipeline.ll
  llvm/test/Other/unroll-sroa.ll
  llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll

Index: llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll
===
--- llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll
+++ llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll
@@ -22,53 +22,21 @@
 %"struct.std::array" = type { [6 x i32] }
 
 define dso_local void @_Z3fooi(i32 %cnt) {
-; OLDPM-LABEL: @_Z3fooi(
-; OLDPM-NEXT:  entry:
-; OLDPM-NEXT:[[ARR:%.*]] = alloca %"struct.std::array", align 16
-; OLDPM-NEXT:[[TMP0:%.*]] = bitcast %"struct.std::array"* [[ARR]] to i8*
-; OLDPM-NEXT:call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull [[TMP0]])
-; OLDPM-NEXT:[[ARRAYDECAY_I_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 0
-; OLDPM-NEXT:[[INCDEC_PTR:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 1
-; OLDPM-NEXT:[[INCDEC_PTR_1:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 2
-; OLDPM-NEXT:[[INCDEC_PTR_2:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 3
-; OLDPM-NEXT:[[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[CNT:%.*]], i32 0
-; OLDPM-NEXT:[[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
-; OLDPM-NEXT:[[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], 
-; OLDPM-NEXT:[[TMP4:%.*]] = bitcast %"struct.std::array"* [[ARR]] to <4 x i32>*
-; OLDPM-NEXT:store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 16
-; OLDPM-NEXT:[[INCDEC_PTR_3:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 4
-; OLDPM-NEXT:[[INC_4:%.*]] = add nsw i32 [[CNT]], 5
-; OLDPM-NEXT:store i32 [[INC_4]], i32* [[INCDEC_PTR_3]], align 16
-; OLDPM-NEXT:[[INC_5:%.*]] = add nsw i32 [[CNT]], 6
-; OLDPM-NEXT:[[TMP5:%.*]] = load i32, i32* [[ARRAYDECAY_I_I_I]], align 16
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP5]])
-; OLDPM-NEXT:[[TMP6:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP6]])
-; OLDPM-NEXT:[[TMP7:%.*]] = load i32, i32* [[INCDEC_PTR_1]], align 8
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP7]])
-; OLDPM-NEXT:[[TMP8:%.*]] = load i32, i32* [[INCDEC_PTR_2]], align 4
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP8]])
-; OLDPM-NEXT:[[TMP9:%.*]] = load i32, i32* [[INCDEC_PTR_3]], align 16
-; OLDPM-NEXT:call void @_Z3usei(i32 [[TMP9]])
-; OLDPM-NEXT:call void @_Z3usei(i32 [[INC_5]])
-; OLDPM-NEXT:call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull [[TMP0]])
-; OLDPM-NEXT:ret void
-;
-; NEWPM-LABEL: @_Z3fooi(
-; NEWPM-NEXT:  entry:
-; NEWPM-NEXT:[[INC:%.*]] = add nsw i32 [[CNT:%.*]], 1
-; NEWPM-NEXT:[[INC_1:%.*]] = add nsw i32 [[CNT]], 2
-; NEWPM-NEXT:[[INC_2:%.*]] = add nsw i32 [[CNT]], 3
-; NEWPM-NEXT:[[INC_3:%.*]] = add nsw i32 [[CNT]], 4
-; NEWPM-NEXT:[[INC_4:%.*]] = add nsw i32 [[CNT]], 5
-; NEWPM-NEXT:[[INC_5:%.*]] = add nsw i32 [[CNT]], 6
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_1]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_2]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_3]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_4]])
-; NEWPM-NEXT:call void @_Z3usei(i32 [[INC_5]])
-; NEWPM-NEXT:ret void
+; CHECK-LABEL: @_Z3fooi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[INC:%.*]] = add nsw i32 [[CNT:%.*]], 1
+; CHECK-NEXT:[[INC_1:%.*]] = add nsw i32 [[CNT]], 2
+; CHECK-NEXT:[[INC_2:%.*]] = add nsw i32 [[CNT]], 3
+; CHECK-NEXT:[[INC_3:%.*]] = add nsw i32 [[CNT]], 4
+; CHECK-NEXT:[[INC_4:%.*]] = add nsw i32 [[CNT]], 5
+; CHECK-NEXT:[[INC_5:%.*]] = add nsw i32 [[CNT]], 6
+; CHECK-NEXT:call void @_Z3usei(i32 [[INC]])
+; CHECK-NEXT:call void @_Z3usei(i32 [[INC_1]])
+; CHECK-NEXT:call void @_Z3usei(i32 [[INC_2]])
+; CHECK-NEXT:call void @_Z3usei(i32 [[INC_3]])
+; CHECK-NEXT:call void @_Z3usei(i32 [[INC_4]])
+; CHECK-NEXT:call void @_Z3usei(i32