[PATCH] D58885: Variable auto-init: split out small arrays
This revision was automatically updated to reflect the committed changes. Closed by commit rL355660: Variable auto-init: split out small arrays (authored by jfb, committed by ). Herald added a project: LLVM. Herald added a subscriber: llvm-commits. Repository: rL LLVM CHANGES SINCE LAST ACTION https://reviews.llvm.org/D58885/new/ https://reviews.llvm.org/D58885 Files: cfe/trunk/lib/CodeGen/CGDecl.cpp cfe/trunk/test/CodeGenCXX/auto-var-init.cpp Index: cfe/trunk/test/CodeGenCXX/auto-var-init.cpp === --- cfe/trunk/test/CodeGenCXX/auto-var-init.cpp +++ cfe/trunk/test/CodeGenCXX/auto-var-init.cpp @@ -129,7 +129,6 @@ // PATTERN-O1-NOT: @__const.test_bool4_custom.custom // ZERO-O1-NOT: @__const.test_bool4_custom.custom -// PATTERN: @__const.test_intptr4_uninit.uninit = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*)], align 16 // PATTERN: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*)], align 16 // ZERO: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*)], align 16 // PATTERN-O0: @__const.test_tailpad4_uninit.uninit = private unnamed_addr constant [4 x { i16, i8, [1 x i8] }] [{ i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }], align 16 @@ -1019,13 +1018,20 @@ // CHECK-NEXT: call void @{{.*}}used{{.*}}%custom) TEST_UNINIT(intptr4, int*[4]); -// CHECK-LABEL: @test_intptr4_uninit() -// CHECK: %uninit = alloca [4 x i32*], align -// CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) -// PATTERN-LABEL: @test_intptr4_uninit() -// PATTERN: call void @llvm.memcpy{{.*}} @__const.test_intptr4_uninit.uninit -// ZERO-LABEL: @test_intptr4_uninit() -// ZERO: call void @llvm.memset{{.*}}, i8 0, +// CHECK-LABEL: @test_intptr4_uninit() +// CHECK:%uninit = alloca [4 x i32*], align +// CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) +// PATTERN-O1-LABEL: @test_intptr4_uninit() +// PATTERN-O1: %1 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 0 +// PATTERN-O1-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %1, align 16 +// PATTERN-O1-NEXT: %2 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 1 +// PATTERN-O1-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %2, align 8 +// PATTERN-O1-NEXT: %3 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 2 +// PATTERN-O1-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %3, align 16 +// PATTERN-O1-NEXT: %4 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 3 +// PATTERN-O1-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %4, align 8 +// ZERO-LABEL: @test_intptr4_uninit() +// ZERO: call void @llvm.memset{{.*}}, i8 0, TEST_BRACES(intptr4, int*[4]); // CHECK-LABEL: @test_intptr4_braces() @@ -1124,7 +1130,7 @@ // PATTERN-LABEL: @test_atomicnotlockfree_uninit() // PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_atomicnotlockfree_uninit.uninit // PATTERN-O1: bitcast -// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}0, i8 -86, i64 32 +// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}, i8 -86, i64 32 // ZERO-LABEL: @test_atomicnotlockfree_uninit() // ZERO: call void @llvm.memset{{.*}}, i8 0, Index: cfe/trunk/lib/CodeGen/CGDecl.cpp === --- cfe/trunk/lib/CodeGen/CGDecl.cpp +++ cfe/trunk/lib/CodeGen/CGDecl.cpp @@ -970,12 +970,12 @@ return llvm::isBytewiseValue(Init); } -/// Decide whether we want to split a constant structure store into a sequence -/// of its fields' stores. This may cost us code size and compilation speed, -/// but plays better with store optimizations. -static bool shouldSplitStructStore(CodeGenModule , - uint64_t GlobalByteSize) { - // Don't break structures that occupy more than one cacheline. +/// Decide whether we want to split a constant structure or array store into a +/// sequence of its fields' stores. This may cost us code size and compilation +/// speed, but plays better with store optimizations. +static bool shouldSplitConstantStore(CodeGenModule , + uint64_t
[PATCH] D58885: Variable auto-init: split out small arrays
jfb marked an inline comment as done. jfb added a comment. Comparing clang stage2 in release mode, with an without this change, we see a 408 byte size difference, which is ~nothing. Here's details, nothing surprising: $ /s/bloaty/bloaty -d sections /s/llvm1/llvm/stage2/bin/clang-9 -- /s/llvm2/llvm/stage2/bin/clang-9 VM SIZE FILE SIZE -- -- +44% +1.84Ki [__TEXT] +1.84Ki +45% +65%+408 [__LINKEDIT]0 [ = ] -0.0% -8 Table of Non-instructions -8 -0.0% -0.0% -48 Symbol Table -48 -0.0% -0.0%-120 Export Info -120 -0.0% -0.0%-232 String Table -232 -0.0% -0.0%-832 __TEXT,__text-832 -0.0% -0.0% -1.03Ki __TEXT,__cstring -1.03Ki -0.0% [ = ] 0 TOTAL-408 -0.0% $ /s/bloaty/bloaty -d symbols /s/llvm1/llvm/stage2/bin/clang-9 -- /s/llvm2/llvm/stage2/bin/clang-9 VM SIZE FILE SIZE -- -- +45% +1.84Ki [__TEXT] +1.84Ki +45% +0.0%+280 [__LINKEDIT] -128 -0.0% +0.4% +96 clang::ento::check::ASTDecl<>::_checkDecl<>() +96 +0.4% +2.9% +48 emitStoresForConstant() +48 +2.9% -0.9% -16 clang::Sema::DeclareGlobalAllocationFunction() -16 -0.9% -0.0% -16 clang::ento::check::PostStmt<>::_checkStmt<>() -16 -0.0% -8.3% -78 clang::AnalyzerOptions::getCheckerStringOption() -78 -8.3% -12.5% -80 clang::ento::registerPaddingChecker() -80 -12.5% -6.3% -96 clang::ASTContext::GetBuiltinType() -96 -6.3% -37.8%-205 clang::AnalyzerOptions::getCheckerIntegerOption() -205 -37.8% -49.5%-269 clang::AnalyzerOptions::getCheckerBooleanOption() -269 -49.5% -0.0%-480 llvm::APInt::toString()::Digits -480 -0.0% -67.4%-496 clang::driver::toolchains::NetBSD::addLibCxxIncludePaths() -496 -67.4% -0.0%-576 [__TEXT,__cstring] -576 -0.0% [ = ] 0 TOTAL -408 -0.0% For codebases with more small arrays this likely plays better, but at least we know it's not a regression. Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D58885/new/ https://reviews.llvm.org/D58885 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D58885: Variable auto-init: split out small arrays
glider accepted this revision. glider added inline comments. This revision is now accepted and ready to land. Comment at: lib/CodeGen/CGDecl.cpp:1206 + bool canDoSingleStore = Ty->isIntOrIntVectorTy() || + Ty->isPtrOrPtrVectorTy() || Ty->isFPOrFPVectorTy(); + if (canDoSingleStore) { jfb wrote: > glider wrote: > > Is the second expression being moved to line 1206 a result of clang-format? > > Otherwise it'll migrate back at some point. > Yes, the slightly longer name pushes it past 80 columns, and I just > auto-format stuff before uploading. > > I can do this change separately, I just noticed that the name I originally > used was now misleading because vectors aren't scalars :) Up to you, this doesn't matter IMO :) Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D58885/new/ https://reviews.llvm.org/D58885 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D58885: Variable auto-init: split out small arrays
jfb marked 3 inline comments as done. jfb added inline comments. Comment at: lib/CodeGen/CGDecl.cpp:1206 + bool canDoSingleStore = Ty->isIntOrIntVectorTy() || + Ty->isPtrOrPtrVectorTy() || Ty->isFPOrFPVectorTy(); + if (canDoSingleStore) { glider wrote: > Is the second expression being moved to line 1206 a result of clang-format? > Otherwise it'll migrate back at some point. Yes, the slightly longer name pushes it past 80 columns, and I just auto-format stuff before uploading. I can do this change separately, I just noticed that the name I originally used was now misleading because vectors aren't scalars :) Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D58885/new/ https://reviews.llvm.org/D58885 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D58885: Variable auto-init: split out small arrays
glider added a comment. The change itself looks good. It doesn't seem to regress kernel performance on ARM64. I haven't got to testing on x86 yet, but don't anticipate any problems either. Comment at: lib/CodeGen/CGDecl.cpp:1206 + bool canDoSingleStore = Ty->isIntOrIntVectorTy() || + Ty->isPtrOrPtrVectorTy() || Ty->isFPOrFPVectorTy(); + if (canDoSingleStore) { Is the second expression being moved to line 1206 a result of clang-format? Otherwise it'll migrate back at some point. Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D58885/new/ https://reviews.llvm.org/D58885 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D58885: Variable auto-init: split out small arrays
jfb updated this revision to Diff 189159. jfb added a comment. - Fix test labels. Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D58885/new/ https://reviews.llvm.org/D58885 Files: lib/CodeGen/CGDecl.cpp test/CodeGenCXX/auto-var-init.cpp Index: test/CodeGenCXX/auto-var-init.cpp === --- test/CodeGenCXX/auto-var-init.cpp +++ test/CodeGenCXX/auto-var-init.cpp @@ -129,7 +129,6 @@ // PATTERN-O1-NOT: @__const.test_bool4_custom.custom // ZERO-O1-NOT: @__const.test_bool4_custom.custom -// PATTERN: @__const.test_intptr4_uninit.uninit = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*)], align 16 // PATTERN: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*)], align 16 // ZERO: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*)], align 16 // PATTERN-O0: @__const.test_tailpad4_uninit.uninit = private unnamed_addr constant [4 x { i16, i8, [1 x i8] }] [{ i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }], align 16 @@ -1019,13 +1018,20 @@ // CHECK-NEXT: call void @{{.*}}used{{.*}}%custom) TEST_UNINIT(intptr4, int*[4]); -// CHECK-LABEL: @test_intptr4_uninit() -// CHECK: %uninit = alloca [4 x i32*], align -// CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) -// PATTERN-LABEL: @test_intptr4_uninit() -// PATTERN: call void @llvm.memcpy{{.*}} @__const.test_intptr4_uninit.uninit -// ZERO-LABEL: @test_intptr4_uninit() -// ZERO: call void @llvm.memset{{.*}}, i8 0, +// CHECK-LABEL: @test_intptr4_uninit() +// CHECK:%uninit = alloca [4 x i32*], align +// CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) +// PATTERN-O1-LABEL: @test_intptr4_uninit() +// PATTERN-O1: %1 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 0 +// PATTERN-O1-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %1, align 16 +// PATTERN-O1-NEXT: %2 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 1 +// PATTERN-O1-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %2, align 8 +// PATTERN-O1-NEXT: %3 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 2 +// PATTERN-O1-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %3, align 16 +// PATTERN-O1-NEXT: %4 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 3 +// PATTERN-O1-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %4, align 8 +// ZERO-LABEL: @test_intptr4_uninit() +// ZERO: call void @llvm.memset{{.*}}, i8 0, TEST_BRACES(intptr4, int*[4]); // CHECK-LABEL: @test_intptr4_braces() @@ -1124,7 +1130,7 @@ // PATTERN-LABEL: @test_atomicnotlockfree_uninit() // PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_atomicnotlockfree_uninit.uninit // PATTERN-O1: bitcast -// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}0, i8 -86, i64 32 +// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}, i8 -86, i64 32 // ZERO-LABEL: @test_atomicnotlockfree_uninit() // ZERO: call void @llvm.memset{{.*}}, i8 0, Index: lib/CodeGen/CGDecl.cpp === --- lib/CodeGen/CGDecl.cpp +++ lib/CodeGen/CGDecl.cpp @@ -969,12 +969,12 @@ return llvm::isBytewiseValue(Init); } -/// Decide whether we want to split a constant structure store into a sequence -/// of its fields' stores. This may cost us code size and compilation speed, -/// but plays better with store optimizations. -static bool shouldSplitStructStore(CodeGenModule , - uint64_t GlobalByteSize) { - // Don't break structures that occupy more than one cacheline. +/// Decide whether we want to split a constant structure or array store into a +/// sequence of its fields' stores. This may cost us code size and compilation +/// speed, but plays better with store optimizations. +static bool shouldSplitConstantStore(CodeGenModule , + uint64_t GlobalByteSize) { + // Don't break things that occupy more than one cacheline. uint64_t ByteSizeLimit = 64; if (CGM.getCodeGenOpts().OptimizationLevel == 0) return false; @@ -1202,9 +1202,9 @@ CGBuilderTy ,
[PATCH] D58885: Variable auto-init: split out small arrays
jfb marked 2 inline comments as done. jfb added inline comments. Comment at: test/CodeGenCXX/auto-var-init.cpp:1025 // PATTERN-LABEL: @test_intptr4_uninit() -// PATTERN: call void @llvm.memcpy{{.*}} @__const.test_intptr4_uninit.uninit -// ZERO-LABEL: @test_intptr4_uninit() -// ZERO: call void @llvm.memset{{.*}}, i8 0, +// PATTERN: %1 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 0 +// PATTERN-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %1, align 16 glider wrote: > This check fails for me locally. Apologies, I played around with the labels and forgot to fix them before sending the patch. Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D58885/new/ https://reviews.llvm.org/D58885 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D58885: Variable auto-init: split out small arrays
glider added inline comments. Comment at: test/CodeGenCXX/auto-var-init.cpp:1025 // PATTERN-LABEL: @test_intptr4_uninit() -// PATTERN: call void @llvm.memcpy{{.*}} @__const.test_intptr4_uninit.uninit -// ZERO-LABEL: @test_intptr4_uninit() -// ZERO: call void @llvm.memset{{.*}}, i8 0, +// PATTERN: %1 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 0 +// PATTERN-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %1, align 16 This check fails for me locally. Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D58885/new/ https://reviews.llvm.org/D58885 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D58885: Variable auto-init: split out small arrays
jfb updated this revision to Diff 189085. jfb marked an inline comment as done. jfb added a comment. - typo Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D58885/new/ https://reviews.llvm.org/D58885 Files: lib/CodeGen/CGDecl.cpp test/CodeGenCXX/auto-var-init.cpp Index: test/CodeGenCXX/auto-var-init.cpp === --- test/CodeGenCXX/auto-var-init.cpp +++ test/CodeGenCXX/auto-var-init.cpp @@ -129,7 +129,6 @@ // PATTERN-O1-NOT: @__const.test_bool4_custom.custom // ZERO-O1-NOT: @__const.test_bool4_custom.custom -// PATTERN: @__const.test_intptr4_uninit.uninit = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*)], align 16 // PATTERN: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*)], align 16 // ZERO: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*)], align 16 // PATTERN-O0: @__const.test_tailpad4_uninit.uninit = private unnamed_addr constant [4 x { i16, i8, [1 x i8] }] [{ i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }], align 16 @@ -1019,13 +1018,20 @@ // CHECK-NEXT: call void @{{.*}}used{{.*}}%custom) TEST_UNINIT(intptr4, int*[4]); -// CHECK-LABEL: @test_intptr4_uninit() -// CHECK: %uninit = alloca [4 x i32*], align -// CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) +// CHECK-LABEL: @test_intptr4_uninit() +// CHECK: %uninit = alloca [4 x i32*], align +// CHECK-NEXT:call void @{{.*}}used{{.*}}%uninit) // PATTERN-LABEL: @test_intptr4_uninit() -// PATTERN: call void @llvm.memcpy{{.*}} @__const.test_intptr4_uninit.uninit -// ZERO-LABEL: @test_intptr4_uninit() -// ZERO: call void @llvm.memset{{.*}}, i8 0, +// PATTERN: %1 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 0 +// PATTERN-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %1, align 16 +// PATTERN-NEXT: %2 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 1 +// PATTERN-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %2, align 8 +// PATTERN-NEXT: %3 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 2 +// PATTERN-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %3, align 16 +// PATTERN-NEXT: %4 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 3 +// PATTERN-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %4, align 8 +// ZERO-LABEL:@test_intptr4_uninit() +// ZERO: call void @llvm.memset{{.*}}, i8 0, TEST_BRACES(intptr4, int*[4]); // CHECK-LABEL: @test_intptr4_braces() @@ -1124,7 +1130,7 @@ // PATTERN-LABEL: @test_atomicnotlockfree_uninit() // PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_atomicnotlockfree_uninit.uninit // PATTERN-O1: bitcast -// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}0, i8 -86, i64 32 +// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}, i8 -86, i64 32 // ZERO-LABEL: @test_atomicnotlockfree_uninit() // ZERO: call void @llvm.memset{{.*}}, i8 0, Index: lib/CodeGen/CGDecl.cpp === --- lib/CodeGen/CGDecl.cpp +++ lib/CodeGen/CGDecl.cpp @@ -969,12 +969,12 @@ return llvm::isBytewiseValue(Init); } -/// Decide whether we want to split a constant structure store into a sequence -/// of its fields' stores. This may cost us code size and compilation speed, -/// but plays better with store optimizations. -static bool shouldSplitStructStore(CodeGenModule , - uint64_t GlobalByteSize) { - // Don't break structures that occupy more than one cacheline. +/// Decide whether we want to split a constant structure or array store into a +/// sequence of its fields' stores. This may cost us code size and compilation +/// speed, but plays better with store optimizations. +static bool shouldSplitConstantStore(CodeGenModule , + uint64_t GlobalByteSize) { + // Don't break things that occupy more than one cacheline. uint64_t ByteSizeLimit = 64; if (CGM.getCodeGenOpts().OptimizationLevel == 0) return false; @@ -1202,9 +1202,9 @@ CGBuilderTy , llvm::Constant *constant) {
[PATCH] D58885: Variable auto-init: split out small arrays
jfb added a comment. I'll do a few size diffs to double-check that this also pays off. @glider can you also check that it doesn't regress what you've been looking at? Comment at: test/CodeGenCXX/auto-var-init.cpp:1133 // PATTERN-O1: bitcast -// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}0, i8 -86, i64 32 +// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}, i8 -86, i64 32 // ZERO-LABEL: @test_atomicnotlockfree_uninit() This was a typo, inadvertently capturing `%0`. Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D58885/new/ https://reviews.llvm.org/D58885 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D58885: Variable auto-init: split out small arrays
jfb created this revision. jfb added reviewers: glider, pcc, kcc, rjmccall. Herald added subscribers: cfe-commits, jdoerfert, dexonsmith, jkorous. Herald added a project: clang. Following up with r355181, initialize small arrays as well. Repository: rC Clang https://reviews.llvm.org/D58885 Files: lib/CodeGen/CGDecl.cpp test/CodeGenCXX/auto-var-init.cpp Index: test/CodeGenCXX/auto-var-init.cpp === --- test/CodeGenCXX/auto-var-init.cpp +++ test/CodeGenCXX/auto-var-init.cpp @@ -129,7 +129,6 @@ // PATTERN-O1-NOT: @__const.test_bool4_custom.custom // ZERO-O1-NOT: @__const.test_bool4_custom.custom -// PATTERN: @__const.test_intptr4_uninit.uninit = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*), i32* inttoptr (i64 -6148914691236517206 to i32*)], align 16 // PATTERN: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*)], align 16 // ZERO: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x i32*] [i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*), i32* inttoptr (i64 572662306 to i32*)], align 16 // PATTERN-O0: @__const.test_tailpad4_uninit.uninit = private unnamed_addr constant [4 x { i16, i8, [1 x i8] }] [{ i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }, { i16, i8, [1 x i8] } { i16 -21846, i8 -86, [1 x i8] c"\AA" }], align 16 @@ -1019,13 +1018,20 @@ // CHECK-NEXT: call void @{{.*}}used{{.*}}%custom) TEST_UNINIT(intptr4, int*[4]); -// CHECK-LABEL: @test_intptr4_uninit() -// CHECK: %uninit = alloca [4 x i32*], align -// CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) +// CHECK-LABEL: @test_intptr4_uninit() +// CHECK: %uninit = alloca [4 x i32*], align +// CHECK-NEXT:call void @{{.*}}used{{.*}}%uninit) // PATTERN-LABEL: @test_intptr4_uninit() -// PATTERN: call void @llvm.memcpy{{.*}} @__const.test_intptr4_uninit.uninit -// ZERO-LABEL: @test_intptr4_uninit() -// ZERO: call void @llvm.memset{{.*}}, i8 0, +// PATTERN: %1 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 0 +// PATTERN-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %1, align 16 +// PATTERN-NEXT: %2 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 1 +// PATTERN-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %2, align 8 +// PATTERN-NEXT: %3 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 2 +// PATTERN-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %3, align 16 +// PATTERN-NEXT: %4 = getelementptr inbounds [4 x i32*], [4 x i32*]* %uninit, i64 0, i64 3 +// PATTERN-NEXT: store i32* inttoptr (i64 -6148914691236517206 to i32*), i32** %4, align 8 +// ZERO-LABEL:@test_intptr4_uninit() +// ZERO: call void @llvm.memset{{.*}}, i8 0, TEST_BRACES(intptr4, int*[4]); // CHECK-LABEL: @test_intptr4_braces() @@ -1124,7 +1130,7 @@ // PATTERN-LABEL: @test_atomicnotlockfree_uninit() // PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_atomicnotlockfree_uninit.uninit // PATTERN-O1: bitcast -// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}0, i8 -86, i64 32 +// PATTERN-O1: call void @llvm.memset{{.*}}({{.*}}, i8 -86, i64 32 // ZERO-LABEL: @test_atomicnotlockfree_uninit() // ZERO: call void @llvm.memset{{.*}}, i8 0, Index: lib/CodeGen/CGDecl.cpp === --- lib/CodeGen/CGDecl.cpp +++ lib/CodeGen/CGDecl.cpp @@ -969,12 +969,12 @@ return llvm::isBytewiseValue(Init); } -/// Decide whether we want to split a constant structure store into a sequence -/// of its fields' stores. This may cost us code size and compilation speed, -/// but plays better with store optimizations. -static bool shouldSplitStructStore(CodeGenModule , - uint64_t GlobalByteSize) { - // Don't break structures that occupy more than one cacheline. +/// Decide whether we want to split a constant structure or array store into a +/// sequence of its fields' stores. This may cost us code size and compilation +/// speed, but plays better with store optimizations. +static bool shouldSplitConstantStore(CodeGenModule , + uint64_t GlobalByteSize) { + // Don't break things that occupy more than one cacheline. uint64_t ByteSizeLimit = 64; if (CGM.getCodeGenOpts().OptimizationLevel == 0) return false; @@ -1202,9 +1202,9 @@ CGBuilderTy ,