[PATCH] D58885: Variable auto-init: split out small arrays

2019-03-07 Thread JF Bastien via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL355660: Variable auto-init: split out small arrays (authored 
by jfb, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58885/new/

https://reviews.llvm.org/D58885

Files:
  cfe/trunk/lib/CodeGen/CGDecl.cpp
  cfe/trunk/test/CodeGenCXX/auto-var-init.cpp

Index: cfe/trunk/test/CodeGenCXX/auto-var-init.cpp
===
--- cfe/trunk/test/CodeGenCXX/auto-var-init.cpp
+++ cfe/trunk/test/CodeGenCXX/auto-var-init.cpp
@@ -129,7 +129,6 @@
 // PATTERN-O1-NOT: @__const.test_bool4_custom.custom
 // ZERO-O1-NOT: @__const.test_bool4_custom.custom
 
-// PATTERN: @__const.test_intptr4_uninit.uninit = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*)], align 16
 // PATTERN: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*)], align 16
 // ZERO: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*)], align 16
 // PATTERN-O0: @__const.test_tailpad4_uninit.uninit = private unnamed_addr constant [4 x { i16, i8, [1 x i8] }] [{ i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }], align 16
@@ -1019,13 +1018,20 @@
 // CHECK-NEXT:  call void @{{.*}}used{{.*}}%custom)
 
 TEST_UNINIT(intptr4, int*[4]);
-// CHECK-LABEL: @test_intptr4_uninit()
-// CHECK:   %uninit = alloca [4 x i32*], align
-// CHECK-NEXT:  call void @{{.*}}used{{.*}}%uninit)
-// PATTERN-LABEL: @test_intptr4_uninit()
-// PATTERN: call void @llvm.memcpy{{.*}} @__const.test_intptr4_uninit.uninit
-// ZERO-LABEL: @test_intptr4_uninit()
-// ZERO: call void @llvm.memset{{.*}}, i8 0,
+// CHECK-LABEL:  @test_intptr4_uninit()
+// CHECK:%uninit = alloca [4 x i32*], align
+// CHECK-NEXT:   call void @{{.*}}used{{.*}}%uninit)
+// PATTERN-O1-LABEL: @test_intptr4_uninit()
+// PATTERN-O1:   %1 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 0
+// PATTERN-O1-NEXT:  store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %1, align 16
+// PATTERN-O1-NEXT:  %2 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 1
+// PATTERN-O1-NEXT:  store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %2, align 8
+// PATTERN-O1-NEXT:  %3 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 2
+// PATTERN-O1-NEXT:  store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %3, align 16
+// PATTERN-O1-NEXT:  %4 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 3
+// PATTERN-O1-NEXT:  store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %4, align 8
+// ZERO-LABEL:   @test_intptr4_uninit()
+// ZERO: call void @llvm.memset{{.*}}, i8 0,
 
 TEST_BRACES(intptr4, int*[4]);
 // CHECK-LABEL: @test_intptr4_braces()
@@ -1124,7 +1130,7 @@
 // PATTERN-LABEL: @test_atomicnotlockfree_uninit()
 // PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_atomicnotlockfree_uninit.uninit
 // PATTERN-O1: bitcast
-// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}0, i8 -86, i64 32
+// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}, i8 -86, i64 32
 // ZERO-LABEL: @test_atomicnotlockfree_uninit()
 // ZERO: call void @llvm.memset{{.*}}, i8 0,
 
Index: cfe/trunk/lib/CodeGen/CGDecl.cpp
===
--- cfe/trunk/lib/CodeGen/CGDecl.cpp
+++ cfe/trunk/lib/CodeGen/CGDecl.cpp
@@ -970,12 +970,12 @@
   return llvm::isBytewiseValue(Init);
 }
 
-/// Decide whether we want to split a constant structure store into a sequence
-/// of its fields' stores. This may cost us code size and compilation speed,
-/// but plays better with store optimizations.
-static bool shouldSplitStructStore(CodeGenModule ,
-   uint64_t GlobalByteSize) {
-  // Don't break structures that occupy more than one cacheline.
+/// Decide whether we want to split a constant structure or array store into a
+/// sequence of its fields' stores. This may cost us code size and compilation
+/// speed, but plays better with store optimizations.
+static bool shouldSplitConstantStore(CodeGenModule ,
+ uint64_t 

[PATCH] D58885: Variable auto-init: split out small arrays

2019-03-04 Thread JF Bastien via Phabricator via cfe-commits
jfb marked an inline comment as done.
jfb added a comment.

Comparing clang stage2 in release mode, with an without this change, we see a 
408 byte size difference, which is ~nothing. Here's details, nothing surprising:

  $ /s/bloaty/bloaty -d sections /s/llvm1/llvm/stage2/bin/clang-9  -- 
/s/llvm2/llvm/stage2/bin/clang-9
   VM SIZE FILE SIZE
   --   --
 +44% +1.84Ki [__TEXT]  +1.84Ki   +45%
 +65%+408 [__LINKEDIT]0  [ = ]
-0.0%  -8 Table of Non-instructions  -8  -0.0%
-0.0% -48 Symbol Table  -48  -0.0%
-0.0%-120 Export Info  -120  -0.0%
-0.0%-232 String Table -232  -0.0%
-0.0%-832 __TEXT,__text-832  -0.0%
-0.0% -1.03Ki __TEXT,__cstring  -1.03Ki  -0.0%
[ = ]   0 TOTAL-408  -0.0%
  
  $ /s/bloaty/bloaty -d symbols /s/llvm1/llvm/stage2/bin/clang-9  -- 
/s/llvm2/llvm/stage2/bin/clang-9
   VM SIZE  
FILE SIZE
   --
--
 +45% +1.84Ki [__TEXT]   
+1.84Ki   +45%
+0.0%+280 [__LINKEDIT]  
-128  -0.0%
+0.4% +96 clang::ento::check::ASTDecl<>::_checkDecl<>() 
 +96  +0.4%
+2.9% +48 emitStoresForConstant()   
 +48  +2.9%
-0.9% -16 clang::Sema::DeclareGlobalAllocationFunction()
 -16  -0.9%
-0.0% -16 clang::ento::check::PostStmt<>::_checkStmt<>()
 -16  -0.0%
-8.3% -78 clang::AnalyzerOptions::getCheckerStringOption()  
 -78  -8.3%
   -12.5% -80 clang::ento::registerPaddingChecker() 
 -80 -12.5%
-6.3% -96 clang::ASTContext::GetBuiltinType()   
 -96  -6.3%
   -37.8%-205 clang::AnalyzerOptions::getCheckerIntegerOption() 
-205 -37.8%
   -49.5%-269 clang::AnalyzerOptions::getCheckerBooleanOption() 
-269 -49.5%
-0.0%-480 llvm::APInt::toString()::Digits   
-480  -0.0%
   -67.4%-496 clang::driver::toolchains::NetBSD::addLibCxxIncludePaths()
-496 -67.4%
-0.0%-576 [__TEXT,__cstring]
-576  -0.0%
[ = ]   0 TOTAL 
-408  -0.0%

For codebases with more small arrays this likely plays better, but at least we 
know it's not a regression.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58885/new/

https://reviews.llvm.org/D58885



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58885: Variable auto-init: split out small arrays

2019-03-04 Thread Alexander Potapenko via Phabricator via cfe-commits
glider accepted this revision.
glider added inline comments.
This revision is now accepted and ready to land.



Comment at: lib/CodeGen/CGDecl.cpp:1206
+  bool canDoSingleStore = Ty->isIntOrIntVectorTy() ||
+  Ty->isPtrOrPtrVectorTy() || Ty->isFPOrFPVectorTy();
+  if (canDoSingleStore) {

jfb wrote:
> glider wrote:
> > Is the second expression being moved to line 1206 a result of clang-format? 
> > Otherwise it'll migrate back at some point.
> Yes, the slightly longer name pushes it past 80 columns, and I just 
> auto-format stuff before uploading.
> 
> I can do this change separately, I just noticed that the name I originally 
> used was now misleading because vectors aren't scalars :)
Up to you, this doesn't matter IMO :)


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58885/new/

https://reviews.llvm.org/D58885



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58885: Variable auto-init: split out small arrays

2019-03-04 Thread JF Bastien via Phabricator via cfe-commits
jfb marked 3 inline comments as done.
jfb added inline comments.



Comment at: lib/CodeGen/CGDecl.cpp:1206
+  bool canDoSingleStore = Ty->isIntOrIntVectorTy() ||
+  Ty->isPtrOrPtrVectorTy() || Ty->isFPOrFPVectorTy();
+  if (canDoSingleStore) {

glider wrote:
> Is the second expression being moved to line 1206 a result of clang-format? 
> Otherwise it'll migrate back at some point.
Yes, the slightly longer name pushes it past 80 columns, and I just auto-format 
stuff before uploading.

I can do this change separately, I just noticed that the name I originally used 
was now misleading because vectors aren't scalars :)


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58885/new/

https://reviews.llvm.org/D58885



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58885: Variable auto-init: split out small arrays

2019-03-04 Thread Alexander Potapenko via Phabricator via cfe-commits
glider added a comment.

The change itself looks good.
It doesn't seem to regress kernel performance on ARM64. I haven't got to 
testing on x86 yet, but don't anticipate any problems either.




Comment at: lib/CodeGen/CGDecl.cpp:1206
+  bool canDoSingleStore = Ty->isIntOrIntVectorTy() ||
+  Ty->isPtrOrPtrVectorTy() || Ty->isFPOrFPVectorTy();
+  if (canDoSingleStore) {

Is the second expression being moved to line 1206 a result of clang-format? 
Otherwise it'll migrate back at some point.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58885/new/

https://reviews.llvm.org/D58885



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58885: Variable auto-init: split out small arrays

2019-03-04 Thread JF Bastien via Phabricator via cfe-commits
jfb updated this revision to Diff 189159.
jfb added a comment.

- Fix test labels.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58885/new/

https://reviews.llvm.org/D58885

Files:
  lib/CodeGen/CGDecl.cpp
  test/CodeGenCXX/auto-var-init.cpp

Index: test/CodeGenCXX/auto-var-init.cpp
===
--- test/CodeGenCXX/auto-var-init.cpp
+++ test/CodeGenCXX/auto-var-init.cpp
@@ -129,7 +129,6 @@
 // PATTERN-O1-NOT: @__const.test_bool4_custom.custom
 // ZERO-O1-NOT: @__const.test_bool4_custom.custom
 
-// PATTERN: @__const.test_intptr4_uninit.uninit = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*)], align 16
 // PATTERN: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*)], align 16
 // ZERO: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*)], align 16
 // PATTERN-O0: @__const.test_tailpad4_uninit.uninit = private unnamed_addr constant [4 x { i16, i8, [1 x i8] }] [{ i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }], align 16
@@ -1019,13 +1018,20 @@
 // CHECK-NEXT:  call void @{{.*}}used{{.*}}%custom)
 
 TEST_UNINIT(intptr4, int*[4]);
-// CHECK-LABEL: @test_intptr4_uninit()
-// CHECK:   %uninit = alloca [4 x i32*], align
-// CHECK-NEXT:  call void @{{.*}}used{{.*}}%uninit)
-// PATTERN-LABEL: @test_intptr4_uninit()
-// PATTERN: call void @llvm.memcpy{{.*}} @__const.test_intptr4_uninit.uninit
-// ZERO-LABEL: @test_intptr4_uninit()
-// ZERO: call void @llvm.memset{{.*}}, i8 0,
+// CHECK-LABEL:  @test_intptr4_uninit()
+// CHECK:%uninit = alloca [4 x i32*], align
+// CHECK-NEXT:   call void @{{.*}}used{{.*}}%uninit)
+// PATTERN-O1-LABEL: @test_intptr4_uninit()
+// PATTERN-O1:   %1 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 0
+// PATTERN-O1-NEXT:  store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %1, align 16
+// PATTERN-O1-NEXT:  %2 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 1
+// PATTERN-O1-NEXT:  store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %2, align 8
+// PATTERN-O1-NEXT:  %3 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 2
+// PATTERN-O1-NEXT:  store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %3, align 16
+// PATTERN-O1-NEXT:  %4 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 3
+// PATTERN-O1-NEXT:  store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %4, align 8
+// ZERO-LABEL:   @test_intptr4_uninit()
+// ZERO: call void @llvm.memset{{.*}}, i8 0,
 
 TEST_BRACES(intptr4, int*[4]);
 // CHECK-LABEL: @test_intptr4_braces()
@@ -1124,7 +1130,7 @@
 // PATTERN-LABEL: @test_atomicnotlockfree_uninit()
 // PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_atomicnotlockfree_uninit.uninit
 // PATTERN-O1: bitcast
-// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}0, i8 -86, i64 32
+// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}, i8 -86, i64 32
 // ZERO-LABEL: @test_atomicnotlockfree_uninit()
 // ZERO: call void @llvm.memset{{.*}}, i8 0,
 
Index: lib/CodeGen/CGDecl.cpp
===
--- lib/CodeGen/CGDecl.cpp
+++ lib/CodeGen/CGDecl.cpp
@@ -969,12 +969,12 @@
   return llvm::isBytewiseValue(Init);
 }
 
-/// Decide whether we want to split a constant structure store into a sequence
-/// of its fields' stores. This may cost us code size and compilation speed,
-/// but plays better with store optimizations.
-static bool shouldSplitStructStore(CodeGenModule ,
-   uint64_t GlobalByteSize) {
-  // Don't break structures that occupy more than one cacheline.
+/// Decide whether we want to split a constant structure or array store into a
+/// sequence of its fields' stores. This may cost us code size and compilation
+/// speed, but plays better with store optimizations.
+static bool shouldSplitConstantStore(CodeGenModule ,
+ uint64_t GlobalByteSize) {
+  // Don't break things that occupy more than one cacheline.
   uint64_t ByteSizeLimit = 64;
   if (CGM.getCodeGenOpts().OptimizationLevel == 0)
 return false;
@@ -1202,9 +1202,9 @@
   CGBuilderTy ,
 

[PATCH] D58885: Variable auto-init: split out small arrays

2019-03-04 Thread JF Bastien via Phabricator via cfe-commits
jfb marked 2 inline comments as done.
jfb added inline comments.



Comment at: test/CodeGenCXX/auto-var-init.cpp:1025
 // PATTERN-LABEL: @test_intptr4_uninit()
-// PATTERN: call void @llvm.memcpy{{.*}} @__const.test_intptr4_uninit.uninit
-// ZERO-LABEL: @test_intptr4_uninit()
-// ZERO: call void @llvm.memset{{.*}}, i8 0,
+// PATTERN:   %1 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, 
i64 0, i64 0
+// PATTERN-NEXT:  store i32* inttoptr (i64 -6148914691236517206 to i32*), 
i32** %1, align 16

glider wrote:
> This check fails for me locally.
Apologies, I played around with the labels and forgot to fix them before 
sending the patch.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58885/new/

https://reviews.llvm.org/D58885



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58885: Variable auto-init: split out small arrays

2019-03-04 Thread Alexander Potapenko via Phabricator via cfe-commits
glider added inline comments.



Comment at: test/CodeGenCXX/auto-var-init.cpp:1025
 // PATTERN-LABEL: @test_intptr4_uninit()
-// PATTERN: call void @llvm.memcpy{{.*}} @__const.test_intptr4_uninit.uninit
-// ZERO-LABEL: @test_intptr4_uninit()
-// ZERO: call void @llvm.memset{{.*}}, i8 0,
+// PATTERN:   %1 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, 
i64 0, i64 0
+// PATTERN-NEXT:  store i32* inttoptr (i64 -6148914691236517206 to i32*), 
i32** %1, align 16

This check fails for me locally.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58885/new/

https://reviews.llvm.org/D58885



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58885: Variable auto-init: split out small arrays

2019-03-03 Thread JF Bastien via Phabricator via cfe-commits
jfb updated this revision to Diff 189085.
jfb marked an inline comment as done.
jfb added a comment.

- typo


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58885/new/

https://reviews.llvm.org/D58885

Files:
  lib/CodeGen/CGDecl.cpp
  test/CodeGenCXX/auto-var-init.cpp

Index: test/CodeGenCXX/auto-var-init.cpp
===
--- test/CodeGenCXX/auto-var-init.cpp
+++ test/CodeGenCXX/auto-var-init.cpp
@@ -129,7 +129,6 @@
 // PATTERN-O1-NOT: @__const.test_bool4_custom.custom
 // ZERO-O1-NOT: @__const.test_bool4_custom.custom
 
-// PATTERN: @__const.test_intptr4_uninit.uninit = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*)], align 16
 // PATTERN: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*)], align 16
 // ZERO: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*)], align 16
 // PATTERN-O0: @__const.test_tailpad4_uninit.uninit = private unnamed_addr constant [4 x { i16, i8, [1 x i8] }] [{ i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }], align 16
@@ -1019,13 +1018,20 @@
 // CHECK-NEXT:  call void @{{.*}}used{{.*}}%custom)
 
 TEST_UNINIT(intptr4, int*[4]);
-// CHECK-LABEL: @test_intptr4_uninit()
-// CHECK:   %uninit = alloca [4 x i32*], align
-// CHECK-NEXT:  call void @{{.*}}used{{.*}}%uninit)
+// CHECK-LABEL:   @test_intptr4_uninit()
+// CHECK: %uninit = alloca [4 x i32*], align
+// CHECK-NEXT:call void @{{.*}}used{{.*}}%uninit)
 // PATTERN-LABEL: @test_intptr4_uninit()
-// PATTERN: call void @llvm.memcpy{{.*}} @__const.test_intptr4_uninit.uninit
-// ZERO-LABEL: @test_intptr4_uninit()
-// ZERO: call void @llvm.memset{{.*}}, i8 0,
+// PATTERN:   %1 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 0
+// PATTERN-NEXT:  store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %1, align 16
+// PATTERN-NEXT:  %2 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 1
+// PATTERN-NEXT:  store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %2, align 8
+// PATTERN-NEXT:  %3 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 2
+// PATTERN-NEXT:  store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %3, align 16
+// PATTERN-NEXT:  %4 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 3
+// PATTERN-NEXT:  store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %4, align 8
+// ZERO-LABEL:@test_intptr4_uninit()
+// ZERO: call void @llvm.memset{{.*}}, i8 0,
 
 TEST_BRACES(intptr4, int*[4]);
 // CHECK-LABEL: @test_intptr4_braces()
@@ -1124,7 +1130,7 @@
 // PATTERN-LABEL: @test_atomicnotlockfree_uninit()
 // PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_atomicnotlockfree_uninit.uninit
 // PATTERN-O1: bitcast
-// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}0, i8 -86, i64 32
+// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}, i8 -86, i64 32
 // ZERO-LABEL: @test_atomicnotlockfree_uninit()
 // ZERO: call void @llvm.memset{{.*}}, i8 0,
 
Index: lib/CodeGen/CGDecl.cpp
===
--- lib/CodeGen/CGDecl.cpp
+++ lib/CodeGen/CGDecl.cpp
@@ -969,12 +969,12 @@
   return llvm::isBytewiseValue(Init);
 }
 
-/// Decide whether we want to split a constant structure store into a sequence
-/// of its fields' stores. This may cost us code size and compilation speed,
-/// but plays better with store optimizations.
-static bool shouldSplitStructStore(CodeGenModule ,
-   uint64_t GlobalByteSize) {
-  // Don't break structures that occupy more than one cacheline.
+/// Decide whether we want to split a constant structure or array store into a
+/// sequence of its fields' stores. This may cost us code size and compilation
+/// speed, but plays better with store optimizations.
+static bool shouldSplitConstantStore(CodeGenModule ,
+ uint64_t GlobalByteSize) {
+  // Don't break things that occupy more than one cacheline.
   uint64_t ByteSizeLimit = 64;
   if (CGM.getCodeGenOpts().OptimizationLevel == 0)
 return false;
@@ -1202,9 +1202,9 @@
   CGBuilderTy ,
   llvm::Constant *constant) {
   

[PATCH] D58885: Variable auto-init: split out small arrays

2019-03-03 Thread JF Bastien via Phabricator via cfe-commits
jfb added a comment.

I'll do a few size diffs to double-check that this also pays off. @glider can 
you also check that it doesn't regress what you've been looking at?




Comment at: test/CodeGenCXX/auto-var-init.cpp:1133
 // PATTERN-O1: bitcast
-// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}0, i8 -86, i64 32
+// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}, i8 -86, i64 32
 // ZERO-LABEL: @test_atomicnotlockfree_uninit()

This was a typo, inadvertently capturing `%0`.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58885/new/

https://reviews.llvm.org/D58885



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58885: Variable auto-init: split out small arrays

2019-03-03 Thread JF Bastien via Phabricator via cfe-commits
jfb created this revision.
jfb added reviewers: glider, pcc, kcc, rjmccall.
Herald added subscribers: cfe-commits, jdoerfert, dexonsmith, jkorous.
Herald added a project: clang.

Following up with r355181, initialize small arrays as well.


Repository:
  rC Clang

https://reviews.llvm.org/D58885

Files:
  lib/CodeGen/CGDecl.cpp
  test/CodeGenCXX/auto-var-init.cpp

Index: test/CodeGenCXX/auto-var-init.cpp
===
--- test/CodeGenCXX/auto-var-init.cpp
+++ test/CodeGenCXX/auto-var-init.cpp
@@ -129,7 +129,6 @@
 // PATTERN-O1-NOT: @__const.test_bool4_custom.custom
 // ZERO-O1-NOT: @__const.test_bool4_custom.custom
 
-// PATTERN: @__const.test_intptr4_uninit.uninit = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*)], align 16
 // PATTERN: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*)], align 16
 // ZERO: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*)], align 16
 // PATTERN-O0: @__const.test_tailpad4_uninit.uninit = private unnamed_addr constant [4 x { i16, i8, [1 x i8] }] [{ i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }], align 16
@@ -1019,13 +1018,20 @@
 // CHECK-NEXT:  call void @{{.*}}used{{.*}}%custom)
 
 TEST_UNINIT(intptr4, int*[4]);
-// CHECK-LABEL: @test_intptr4_uninit()
-// CHECK:   %uninit = alloca [4 x i32*], align
-// CHECK-NEXT:  call void @{{.*}}used{{.*}}%uninit)
+// CHECK-LABEL:   @test_intptr4_uninit()
+// CHECK: %uninit = alloca [4 x i32*], align
+// CHECK-NEXT:call void @{{.*}}used{{.*}}%uninit)
 // PATTERN-LABEL: @test_intptr4_uninit()
-// PATTERN: call void @llvm.memcpy{{.*}} @__const.test_intptr4_uninit.uninit
-// ZERO-LABEL: @test_intptr4_uninit()
-// ZERO: call void @llvm.memset{{.*}}, i8 0,
+// PATTERN:   %1 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 0
+// PATTERN-NEXT:  store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %1, align 16
+// PATTERN-NEXT:  %2 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 1
+// PATTERN-NEXT:  store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %2, align 8
+// PATTERN-NEXT:  %3 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 2
+// PATTERN-NEXT:  store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %3, align 16
+// PATTERN-NEXT:  %4 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 3
+// PATTERN-NEXT:  store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %4, align 8
+// ZERO-LABEL:@test_intptr4_uninit()
+// ZERO: call void @llvm.memset{{.*}}, i8 0,
 
 TEST_BRACES(intptr4, int*[4]);
 // CHECK-LABEL: @test_intptr4_braces()
@@ -1124,7 +1130,7 @@
 // PATTERN-LABEL: @test_atomicnotlockfree_uninit()
 // PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_atomicnotlockfree_uninit.uninit
 // PATTERN-O1: bitcast
-// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}0, i8 -86, i64 32
+// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}, i8 -86, i64 32
 // ZERO-LABEL: @test_atomicnotlockfree_uninit()
 // ZERO: call void @llvm.memset{{.*}}, i8 0,
 
Index: lib/CodeGen/CGDecl.cpp
===
--- lib/CodeGen/CGDecl.cpp
+++ lib/CodeGen/CGDecl.cpp
@@ -969,12 +969,12 @@
   return llvm::isBytewiseValue(Init);
 }
 
-/// Decide whether we want to split a constant structure store into a sequence
-/// of its fields' stores. This may cost us code size and compilation speed,
-/// but plays better with store optimizations.
-static bool shouldSplitStructStore(CodeGenModule ,
-   uint64_t GlobalByteSize) {
-  // Don't break structures that occupy more than one cacheline.
+/// Decide whether we want to split a constant structure or array store into a
+/// sequence of its fields' stores. This may cost us code size and compilation
+/// speed, but plays better with store optimizations.
+static bool shouldSplitConstantStore(CodeGenModule ,
+ uint64_t GlobalByteSize) {
+  // Don't break things that occupy more than one cacheline.
   uint64_t ByteSizeLimit = 64;
   if (CGM.getCodeGenOpts().OptimizationLevel == 0)
 return false;
@@ -1202,9 +1202,9 @@
   CGBuilderTy ,