[clang] Revert "[clang] Avoid memcopy for small structure with padding under … (PR #84230)

2024-03-06 Thread Antonio Frighetto via cfe-commits

https://github.com/antoniofrighetto updated 
https://github.com/llvm/llvm-project/pull/84230

>From 91ca7b2e5c98a7caa8a97f05f57e84f68d861fa3 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto 
Date: Wed, 6 Mar 2024 23:49:40 +0100
Subject: [PATCH] [clang][CodeGen] Allow memcpy replace with trivial auto var
 init

Fixes: https://github.com/llvm/llvm-project/issues/84178.
---
 clang/lib/CodeGen/CGDecl.cpp  | 43 ++-
 clang/test/CodeGen/aapcs-align.cpp|  4 +--
 clang/test/CodeGen/aapcs64-align.cpp  |  8 ++---
 clang/test/CodeGen/attr-counted-by.c  | 26 --
 clang/test/CodeGenCXX/auto-var-init.cpp   |  6 ++--
 clang/test/CodeGenOpenCL/amdgpu-printf.cl |  9 +
 clang/test/OpenMP/bug54082.c  |  4 +--
 7 files changed, 43 insertions(+), 57 deletions(-)

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c17244..aa9997b87ecfa7 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1241,27 +1241,38 @@ static void emitStoresForConstant(CodeGenModule , 
const VarDecl ,
 return;
   }
 
-  // If the initializer is small, use a handful of stores.
+  // If the initializer is small or trivialAutoVarInit is set, use a handful of
+  // stores.
+  bool IsTrivialAutoVarInitPattern =
+  CGM.getContext().getLangOpts().getTrivialAutoVarInit() ==
+  LangOptions::TrivialAutoVarInitKind::Pattern;
   if (shouldSplitConstantStore(CGM, ConstantSize)) {
 if (auto *STy = dyn_cast(Ty)) {
-  const llvm::StructLayout *Layout =
-  CGM.getDataLayout().getStructLayout(STy);
-  for (unsigned i = 0; i != constant->getNumOperands(); i++) {
-CharUnits CurOff = 
CharUnits::fromQuantity(Layout->getElementOffset(i));
-Address EltPtr = Builder.CreateConstInBoundsByteGEP(
-Loc.withElementType(CGM.Int8Ty), CurOff);
-emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder,
-  constant->getAggregateElement(i), IsAutoInit);
+  if (STy == Loc.getElementType() ||
+  (STy != Loc.getElementType() && IsTrivialAutoVarInitPattern)) {
+const llvm::StructLayout *Layout =
+CGM.getDataLayout().getStructLayout(STy);
+for (unsigned i = 0; i != constant->getNumOperands(); i++) {
+  CharUnits CurOff =
+  CharUnits::fromQuantity(Layout->getElementOffset(i));
+  Address EltPtr = Builder.CreateConstInBoundsByteGEP(
+  Loc.withElementType(CGM.Int8Ty), CurOff);
+  emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder,
+constant->getAggregateElement(i), IsAutoInit);
+}
+return;
   }
-  return;
 } else if (auto *ATy = dyn_cast(Ty)) {
-  for (unsigned i = 0; i != ATy->getNumElements(); i++) {
-Address EltPtr = Builder.CreateConstGEP(
-Loc.withElementType(ATy->getElementType()), i);
-emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder,
-  constant->getAggregateElement(i), IsAutoInit);
+  if (ATy == Loc.getElementType() ||
+  (ATy != Loc.getElementType() && IsTrivialAutoVarInitPattern)) {
+for (unsigned i = 0; i != ATy->getNumElements(); i++) {
+  Address EltPtr = Builder.CreateConstGEP(
+  Loc.withElementType(ATy->getElementType()), i);
+  emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder,
+constant->getAggregateElement(i), IsAutoInit);
+}
+return;
   }
-  return;
 }
   }
 
diff --git a/clang/test/CodeGen/aapcs-align.cpp 
b/clang/test/CodeGen/aapcs-align.cpp
index 2886a32974b066..4f393d9e6b7f32 100644
--- a/clang/test/CodeGen/aapcs-align.cpp
+++ b/clang/test/CodeGen/aapcs-align.cpp
@@ -134,8 +134,8 @@ void g6() {
   f6m(1, 2, 3, 4, 5, s);
 }
 // CHECK: define{{.*}} void @g6
-// CHECK: call void @f6(i32 noundef 1, [4 x i32] [i32 6, i32 7, i32 0, i32 0])
-// CHECK: call void @f6m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 
noundef 4, i32 noundef 5, [4 x i32] [i32 6, i32 7, i32 0, i32 0])
+// CHECK: call void @f6(i32 noundef 1, [4 x i32] [i32 6, i32 7, i32 0, i32 
undef])
+// CHECK: call void @f6m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 
noundef 4, i32 noundef 5, [4 x i32] [i32 6, i32 7, i32 0, i32 undef])
 // CHECK: declare void @f6(i32 noundef, [4 x i32])
 // CHECK: declare void @f6m(i32 noundef, i32 noundef, i32 noundef, i32 
noundef, i32 noundef, [4 x i32])
 }
diff --git a/clang/test/CodeGen/aapcs64-align.cpp 
b/clang/test/CodeGen/aapcs64-align.cpp
index 759413cbc4b56f..de231f2123b975 100644
--- a/clang/test/CodeGen/aapcs64-align.cpp
+++ b/clang/test/CodeGen/aapcs64-align.cpp
@@ -75,8 +75,8 @@ void g4() {
   f4m(1, 2, 3, 4, 5, s);
 }
 // CHECK: define{{.*}} void @g4()
-// CHECK: call void @f4(i32 noundef 1, [2 x i64] %{{.*}})
-// CHECK: void @f4m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 
4, 

[clang] Revert "[clang] Avoid memcopy for small structure with padding under … (PR #84230)

2024-03-06 Thread Nikita Popov via cfe-commits

nikic wrote:

Can you please provide more information on what the regression was / how this 
fixes it?

https://github.com/llvm/llvm-project/pull/84230
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revert "[clang] Avoid memcopy for small structure with padding under … (PR #84230)

2024-03-06 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-codegen

Author: Antonio Frighetto (antoniofrighetto)


Changes

…-ftrivial-auto-var-init (#71677)"

This reverts commit afe8b93ffdfef5d8879e1894b9d7dda40dee2b8d.

Fixes regression: https://github.com/llvm/llvm-project/issues/84178.

---
Full diff: https://github.com/llvm/llvm-project/pull/84230.diff


6 Files Affected:

- (modified) clang/lib/CodeGen/CGDecl.cpp (+20-15) 
- (modified) clang/test/CodeGen/aapcs-align.cpp (+2-2) 
- (modified) clang/test/CodeGen/aapcs64-align.cpp (+4-4) 
- (modified) clang/test/CodeGenCXX/auto-var-init.cpp (+24-23) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-printf.cl (+1-8) 
- (modified) clang/test/OpenMP/bug54082.c (+1-3) 


``diff
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c17244..46cfd3f10a3fb7 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1244,24 +1244,29 @@ static void emitStoresForConstant(CodeGenModule , 
const VarDecl ,
   // If the initializer is small, use a handful of stores.
   if (shouldSplitConstantStore(CGM, ConstantSize)) {
 if (auto *STy = dyn_cast(Ty)) {
-  const llvm::StructLayout *Layout =
-  CGM.getDataLayout().getStructLayout(STy);
-  for (unsigned i = 0; i != constant->getNumOperands(); i++) {
-CharUnits CurOff = 
CharUnits::fromQuantity(Layout->getElementOffset(i));
-Address EltPtr = Builder.CreateConstInBoundsByteGEP(
-Loc.withElementType(CGM.Int8Ty), CurOff);
-emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder,
-  constant->getAggregateElement(i), IsAutoInit);
+  // FIXME: handle the case when STy != Loc.getElementType().
+  if (STy == Loc.getElementType()) {
+for (unsigned i = 0; i != constant->getNumOperands(); i++) {
+  Address EltPtr = Builder.CreateStructGEP(Loc, i);
+  emitStoresForConstant(
+  CGM, D, EltPtr, isVolatile, Builder,
+  cast(Builder.CreateExtractValue(constant, i)),
+  IsAutoInit);
+}
+return;
   }
-  return;
 } else if (auto *ATy = dyn_cast(Ty)) {
-  for (unsigned i = 0; i != ATy->getNumElements(); i++) {
-Address EltPtr = Builder.CreateConstGEP(
-Loc.withElementType(ATy->getElementType()), i);
-emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder,
-  constant->getAggregateElement(i), IsAutoInit);
+  // FIXME: handle the case when ATy != Loc.getElementType().
+  if (ATy == Loc.getElementType()) {
+for (unsigned i = 0; i != ATy->getNumElements(); i++) {
+  Address EltPtr = Builder.CreateConstArrayGEP(Loc, i);
+  emitStoresForConstant(
+  CGM, D, EltPtr, isVolatile, Builder,
+  cast(Builder.CreateExtractValue(constant, i)),
+  IsAutoInit);
+}
+return;
   }
-  return;
 }
   }
 
diff --git a/clang/test/CodeGen/aapcs-align.cpp 
b/clang/test/CodeGen/aapcs-align.cpp
index 2886a32974b066..4f393d9e6b7f32 100644
--- a/clang/test/CodeGen/aapcs-align.cpp
+++ b/clang/test/CodeGen/aapcs-align.cpp
@@ -134,8 +134,8 @@ void g6() {
   f6m(1, 2, 3, 4, 5, s);
 }
 // CHECK: define{{.*}} void @g6
-// CHECK: call void @f6(i32 noundef 1, [4 x i32] [i32 6, i32 7, i32 0, i32 0])
-// CHECK: call void @f6m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 
noundef 4, i32 noundef 5, [4 x i32] [i32 6, i32 7, i32 0, i32 0])
+// CHECK: call void @f6(i32 noundef 1, [4 x i32] [i32 6, i32 7, i32 0, i32 
undef])
+// CHECK: call void @f6m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 
noundef 4, i32 noundef 5, [4 x i32] [i32 6, i32 7, i32 0, i32 undef])
 // CHECK: declare void @f6(i32 noundef, [4 x i32])
 // CHECK: declare void @f6m(i32 noundef, i32 noundef, i32 noundef, i32 
noundef, i32 noundef, [4 x i32])
 }
diff --git a/clang/test/CodeGen/aapcs64-align.cpp 
b/clang/test/CodeGen/aapcs64-align.cpp
index 759413cbc4b56f..de231f2123b975 100644
--- a/clang/test/CodeGen/aapcs64-align.cpp
+++ b/clang/test/CodeGen/aapcs64-align.cpp
@@ -75,8 +75,8 @@ void g4() {
   f4m(1, 2, 3, 4, 5, s);
 }
 // CHECK: define{{.*}} void @g4()
-// CHECK: call void @f4(i32 noundef 1, [2 x i64] %{{.*}})
-// CHECK: void @f4m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 
4, i32 noundef 5, [2 x i64] %{{.*}})
+// CHECK: call void @f4(i32 noundef 1, [2 x i64] [i64 30064771078, i64 0])
+// CHECK: void @f4m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 
4, i32 noundef 5, [2 x i64] [i64 30064771078, i64 0])
 // CHECK: declare void @f4(i32 noundef, [2 x i64])
 // CHECK: declare void @f4m(i32 noundef, i32 noundef, i32 noundef, i32 
noundef, i32 noundef, [2 x i64])
 
@@ -95,8 +95,8 @@ void f5m(int, int, int, int, int, P16);
 f5m(1, 2, 3, 4, 5, s);
 }
 // CHECK: define{{.*}} void @g5()
-// CHECK: call void @f5(i32 noundef 1, [2 x i64] %{{.*}})
-// CHECK: void @f5m(i32 noundef 1, i32 noundef 2, i32 

[clang] Revert "[clang] Avoid memcopy for small structure with padding under … (PR #84230)

2024-03-06 Thread Antonio Frighetto via cfe-commits

https://github.com/antoniofrighetto created 
https://github.com/llvm/llvm-project/pull/84230

…-ftrivial-auto-var-init (#71677)"

This reverts commit afe8b93ffdfef5d8879e1894b9d7dda40dee2b8d.

Fixes regression: https://github.com/llvm/llvm-project/issues/84178.

>From bb22eccc90d0e8cb02be5d4c47a08a17baf4d242 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto 
Date: Wed, 6 Mar 2024 21:29:13 +0100
Subject: [PATCH] Revert "[clang] Avoid memcopy for small structure with
 padding under -ftrivial-auto-var-init (#71677)"

This reverts commit afe8b93ffdfef5d8879e1894b9d7dda40dee2b8d.

Fixes: https://github.com/llvm/llvm-project/issues/84178.
---
 clang/lib/CodeGen/CGDecl.cpp  | 35 +
 clang/test/CodeGen/aapcs-align.cpp|  4 +-
 clang/test/CodeGen/aapcs64-align.cpp  |  8 ++--
 clang/test/CodeGenCXX/auto-var-init.cpp   | 47 ---
 clang/test/CodeGenOpenCL/amdgpu-printf.cl |  9 +
 clang/test/OpenMP/bug54082.c  |  4 +-
 6 files changed, 52 insertions(+), 55 deletions(-)

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c17244..46cfd3f10a3fb7 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1244,24 +1244,29 @@ static void emitStoresForConstant(CodeGenModule , 
const VarDecl ,
   // If the initializer is small, use a handful of stores.
   if (shouldSplitConstantStore(CGM, ConstantSize)) {
 if (auto *STy = dyn_cast(Ty)) {
-  const llvm::StructLayout *Layout =
-  CGM.getDataLayout().getStructLayout(STy);
-  for (unsigned i = 0; i != constant->getNumOperands(); i++) {
-CharUnits CurOff = 
CharUnits::fromQuantity(Layout->getElementOffset(i));
-Address EltPtr = Builder.CreateConstInBoundsByteGEP(
-Loc.withElementType(CGM.Int8Ty), CurOff);
-emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder,
-  constant->getAggregateElement(i), IsAutoInit);
+  // FIXME: handle the case when STy != Loc.getElementType().
+  if (STy == Loc.getElementType()) {
+for (unsigned i = 0; i != constant->getNumOperands(); i++) {
+  Address EltPtr = Builder.CreateStructGEP(Loc, i);
+  emitStoresForConstant(
+  CGM, D, EltPtr, isVolatile, Builder,
+  cast(Builder.CreateExtractValue(constant, i)),
+  IsAutoInit);
+}
+return;
   }
-  return;
 } else if (auto *ATy = dyn_cast(Ty)) {
-  for (unsigned i = 0; i != ATy->getNumElements(); i++) {
-Address EltPtr = Builder.CreateConstGEP(
-Loc.withElementType(ATy->getElementType()), i);
-emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder,
-  constant->getAggregateElement(i), IsAutoInit);
+  // FIXME: handle the case when ATy != Loc.getElementType().
+  if (ATy == Loc.getElementType()) {
+for (unsigned i = 0; i != ATy->getNumElements(); i++) {
+  Address EltPtr = Builder.CreateConstArrayGEP(Loc, i);
+  emitStoresForConstant(
+  CGM, D, EltPtr, isVolatile, Builder,
+  cast(Builder.CreateExtractValue(constant, i)),
+  IsAutoInit);
+}
+return;
   }
-  return;
 }
   }
 
diff --git a/clang/test/CodeGen/aapcs-align.cpp 
b/clang/test/CodeGen/aapcs-align.cpp
index 2886a32974b066..4f393d9e6b7f32 100644
--- a/clang/test/CodeGen/aapcs-align.cpp
+++ b/clang/test/CodeGen/aapcs-align.cpp
@@ -134,8 +134,8 @@ void g6() {
   f6m(1, 2, 3, 4, 5, s);
 }
 // CHECK: define{{.*}} void @g6
-// CHECK: call void @f6(i32 noundef 1, [4 x i32] [i32 6, i32 7, i32 0, i32 0])
-// CHECK: call void @f6m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 
noundef 4, i32 noundef 5, [4 x i32] [i32 6, i32 7, i32 0, i32 0])
+// CHECK: call void @f6(i32 noundef 1, [4 x i32] [i32 6, i32 7, i32 0, i32 
undef])
+// CHECK: call void @f6m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 
noundef 4, i32 noundef 5, [4 x i32] [i32 6, i32 7, i32 0, i32 undef])
 // CHECK: declare void @f6(i32 noundef, [4 x i32])
 // CHECK: declare void @f6m(i32 noundef, i32 noundef, i32 noundef, i32 
noundef, i32 noundef, [4 x i32])
 }
diff --git a/clang/test/CodeGen/aapcs64-align.cpp 
b/clang/test/CodeGen/aapcs64-align.cpp
index 759413cbc4b56f..de231f2123b975 100644
--- a/clang/test/CodeGen/aapcs64-align.cpp
+++ b/clang/test/CodeGen/aapcs64-align.cpp
@@ -75,8 +75,8 @@ void g4() {
   f4m(1, 2, 3, 4, 5, s);
 }
 // CHECK: define{{.*}} void @g4()
-// CHECK: call void @f4(i32 noundef 1, [2 x i64] %{{.*}})
-// CHECK: void @f4m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 
4, i32 noundef 5, [2 x i64] %{{.*}})
+// CHECK: call void @f4(i32 noundef 1, [2 x i64] [i64 30064771078, i64 0])
+// CHECK: void @f4m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 
4, i32 noundef 5, [2 x i64] [i64 30064771078, i64 0])
 // CHECK: declare void @f4(i32 noundef, [2 x i64])
 // CHECK: declare void @f4m(i32