github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code.
:warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff HEAD~1 HEAD --extensions cpp --
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp clang/lib/Sema/SemaAMDGPU.cpp
clang/lib/Sema/SemaChecking.cpp
``````````
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 8b7e419a1..fca6fbbf5 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -165,20 +165,22 @@ Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned
Index) {
// Assumptions:
// - Return type equals source type (frontend/Sema should enforce).
// - Semantics are on the object representation (raw bits), including padding.
-// - For payloads > 32 bits, split into 32-bit words, permute each with the
same index,
+// - For payloads > 32 bits, split into 32-bit words, permute each with the
same
+// index,
// and reassemble.
-// - First-class scalar/vector values whose total size is a multiple of 32
bits use a
-// register-only path by bitcasting to <N x i32>. Aggregates or odd sizes
use a
-// memory-backed path.
-// - = 32-bit scalars (char/short/int/float/half) follow a fast i32 path for
performance.
-llvm::Value *
-emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF,
- const clang::CallExpr *Call) {
- auto &B = CGF.Builder;
+// - First-class scalar/vector values whose total size is a multiple of 32 bits
+// use a
+// register-only path by bitcasting to <N x i32>. Aggregates or odd sizes use
+// a memory-backed path.
+// - = 32-bit scalars (char/short/int/float/half) follow a fast i32 path for
+// performance.
+llvm::Value *emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF,
+ const clang::CallExpr *Call) {
+ auto &B = CGF.Builder;
auto &CGM = CGF.CGM;
const llvm::DataLayout &DL = CGM.getDataLayout();
- llvm::Type *I8 = B.getInt8Ty();
+ llvm::Type *I8 = B.getInt8Ty();
llvm::Type *I32 = B.getInt32Ty();
llvm::Type *I64 = B.getInt64Ty();
@@ -194,24 +196,29 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF,
// - Integers: zext/trunc to i32.
// - Pointers: ptrtoint to intptr, then zext/trunc to i32.
// - Other first-class: bitcast to intN then zext/trunc to i32.
- auto toI32Index = [&](llvm::Value *IdxVal, clang::QualType IdxQT) ->
llvm::Value * {
+ auto toI32Index = [&](llvm::Value *IdxVal,
+ clang::QualType IdxQT) -> llvm::Value * {
(void)IdxQT; // signedness not relevant for index
llvm::Type *Ty = IdxVal->getType();
if (Ty->isIntegerTy())
return B.CreateZExtOrTrunc(IdxVal, I32);
if (Ty->isPointerTy()) {
unsigned PtrBits = DL.getPointerSizeInBits(Ty->getPointerAddressSpace());
- return B.CreateZExtOrTrunc(B.CreatePtrToInt(IdxVal,
B.getIntNTy(PtrBits)), I32);
+ return B.CreateZExtOrTrunc(B.CreatePtrToInt(IdxVal,
B.getIntNTy(PtrBits)),
+ I32);
}
unsigned Bits = getBitWidth(Ty);
return B.CreateZExtOrTrunc(B.CreateBitCast(IdxVal, B.getIntNTy(Bits)),
I32);
};
// Coerces an arbitrary = 32-bit scalar payload to i32.
- // - Integers: extend to i32 honoring signedness if narrower; zext/trunc
otherwise.
+ // - Integers: extend to i32 honoring signedness if narrower; zext/trunc
+ // otherwise.
// - Pointers: ptrtoint to intptr, then zext/trunc to i32.
- // - Other first-class scalars (e.g., float, half): bitcast to intN then
zext/trunc to i32.
- auto coercePayloadToI32 = [&](llvm::Value *Val, clang::QualType SrcQT) ->
llvm::Value * {
+ // - Other first-class scalars (e.g., float, half): bitcast to intN then
+ // zext/trunc to i32.
+ auto coercePayloadToI32 = [&](llvm::Value *Val,
+ clang::QualType SrcQT) -> llvm::Value * {
llvm::Type *Ty = Val->getType();
if (Ty->isIntegerTy()) {
unsigned BW = Ty->getIntegerBitWidth();
@@ -224,20 +231,22 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF,
}
if (Ty->isPointerTy()) {
unsigned PtrBits = DL.getPointerSizeInBits(Ty->getPointerAddressSpace());
- return B.CreateZExtOrTrunc(B.CreatePtrToInt(Val, B.getIntNTy(PtrBits)),
I32);
+ return B.CreateZExtOrTrunc(B.CreatePtrToInt(Val, B.getIntNTy(PtrBits)),
+ I32);
}
unsigned Bits = getBitWidth(Ty);
return B.CreateZExtOrTrunc(B.CreateBitCast(Val, B.getIntNTy(Bits)), I32);
};
// Converts an i32 result back to an arbitrary = 32-bit destination type.
- // - Integer = 32 bits: zext/sext/trunc appropriately using source
signedness for narrow types.
+ // - Integer = 32 bits: zext/sext/trunc appropriately using source signedness
+ // for narrow types.
// - Pointer = 32 bits: zext/trunc to pointer width and inttoptr.
// - Other first-class types:
// - If 32 bits: bitcast i32 to destination type.
- // - If narrower than 32 bits (e.g., half = 16): first trunc i32 to iN,
then bitcast iN to DstTy.
- auto coerceFromI32ToType = [&](llvm::Value *I32Val,
- llvm::Type *DstTy,
+ // - If narrower than 32 bits (e.g., half = 16): first trunc i32 to iN,
then
+ // bitcast iN to DstTy.
+ auto coerceFromI32ToType = [&](llvm::Value *I32Val, llvm::Type *DstTy,
clang::QualType SrcQT) -> llvm::Value * {
if (DstTy->isIntegerTy()) {
unsigned DW = DstTy->getIntegerBitWidth();
@@ -265,12 +274,14 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF,
if (BW < 32)
Tr = B.CreateTrunc(I32Val, IntBW);
else if (BW > 32)
- Tr = B.CreateZExt(I32Val, IntBW); // should not happen in the fast
32-bit path
+ Tr = B.CreateZExt(I32Val,
+ IntBW); // should not happen in the fast 32-bit path
return B.CreateBitCast(Tr, DstTy);
};
// Returns {wordCount, tailBytes} for a payload size in bits.
- auto wordCountAndTail = [&](unsigned totalBits) -> std::pair<unsigned,
unsigned> {
+ auto wordCountAndTail =
+ [&](unsigned totalBits) -> std::pair<unsigned, unsigned> {
unsigned bytes = totalBits / 8;
return {bytes / 4, bytes % 4};
};
@@ -297,14 +308,16 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF,
if (totalBits <= 32) {
llvm::Value *SrcI32 = coercePayloadToI32(SrcVal, SrcQT);
llvm::SmallVector<llvm::Value *, 2> ArgsA{IndexI32, SrcI32};
- llvm::Value *ResI32 = B.CreateCall(Bperm->getFunctionType(), Bperm,
ArgsA);
- llvm::Value *Res = coerceFromI32ToType(ResI32, RetTy, SrcQT);
+ llvm::Value *ResI32 =
+ B.CreateCall(Bperm->getFunctionType(), Bperm, ArgsA);
+ llvm::Value *Res = coerceFromI32ToType(ResI32, RetTy, SrcQT);
return Res;
}
}
- // Fast path B: First-class scalar/vector whose total size is a multiple of
32 bits.
- // Bitcast to <N x i32>, permute each lane, bitcast back. Register-only; no
memory.
+ // Fast path B: First-class scalar/vector whose total size is a multiple of
32
+ // bits. Bitcast to <N x i32>, permute each lane, bitcast back.
Register-only;
+ // no memory.
if (!IsAggregate) {
llvm::Value *SrcVal = CGF.EmitScalarExpr(Call->getArg(1));
unsigned totalBits = getBitWidth(SrcVal->getType());
@@ -315,7 +328,8 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF,
// Handle pointers by going through intptr first
llvm::Value *AsIntN = SrcVal;
if (SrcVal->getType()->isPointerTy()) {
- unsigned PW =
DL.getPointerSizeInBits(SrcVal->getType()->getPointerAddressSpace());
+ unsigned PW = DL.getPointerSizeInBits(
+ SrcVal->getType()->getPointerAddressSpace());
AsIntN = B.CreatePtrToInt(SrcVal, B.getIntNTy(PW));
}
@@ -324,7 +338,8 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF,
llvm::Value *ResVec = llvm::UndefValue::get(I32VecTy);
for (unsigned i = 0; i < words; ++i) {
llvm::Value *Lane = B.CreateExtractElement(AsI32Vec, c32(i));
- llvm::Value *Perm = B.CreateCall(Bperm->getFunctionType(), Bperm,
{IndexI32, Lane});
+ llvm::Value *Perm =
+ B.CreateCall(Bperm->getFunctionType(), Bperm, {IndexI32, Lane});
ResVec = B.CreateInsertElement(ResVec, Perm, c32(i));
}
@@ -339,16 +354,20 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF,
// General aggregate/odd-size path:
// - Works for structs/arrays/complex and any total size.
- // - Materialize source to a temp, process 4-byte words (unaligned
loads/stores),
- // handle tail bytes by packing/unpacking into an i32, and return loaded
Value*.
+ // - Materialize source to a temp, process 4-byte words (unaligned
+ // loads/stores),
+ // handle tail bytes by packing/unpacking into an i32, and return loaded
+ // Value*.
auto emitAggregatePath = [&]() -> llvm::Value * {
clang::QualType SrcQTLocal = Call->getArg(1)->getType();
llvm::Type *SrcTy = CGF.ConvertType(SrcQTLocal);
- clang::CodeGen::Address SrcAddr = CGF.CreateMemTemp(SrcQTLocal,
"dsbperm.src");
- clang::CodeGen::Address DstAddr = CGF.CreateMemTemp(RetQT,
"dsbperm.dst");
+ clang::CodeGen::Address SrcAddr =
+ CGF.CreateMemTemp(SrcQTLocal, "dsbperm.src");
+ clang::CodeGen::Address DstAddr = CGF.CreateMemTemp(RetQT, "dsbperm.dst");
- CGF.EmitAnyExprToMem(Call->getArg(1), SrcAddr, SrcQTLocal.getQualifiers(),
/*IsInit*/true);
+ CGF.EmitAnyExprToMem(Call->getArg(1), SrcAddr, SrcQTLocal.getQualifiers(),
+ /*IsInit*/ true);
// i8 views of the buffers (as Address).
clang::CodeGen::Address SrcI8Addr = SrcAddr.withElementType(I8);
@@ -357,8 +376,8 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF,
auto CU = [&](uint64_t N) { return clang::CharUnits::fromQuantity(N); };
uint64_t sizeBytes = DL.getTypeAllocSize(SrcTy);
- uint64_t words = sizeBytes / 4;
- uint64_t tail = sizeBytes % 4;
+ uint64_t words = sizeBytes / 4;
+ uint64_t tail = sizeBytes % 4;
for (uint64_t i = 0; i < words; ++i) {
uint64_t off = i * 4;
@@ -377,7 +396,8 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF,
auto *Ld = B.CreateLoad(SrcWordI32Addr);
llvm::SmallVector<llvm::Value *, 2> ArgsWord{IndexI32, Ld};
- llvm::Value *Perm = B.CreateCall(Bperm->getFunctionType(), Bperm,
ArgsWord);
+ llvm::Value *Perm =
+ B.CreateCall(Bperm->getFunctionType(), Bperm, ArgsWord);
(void)B.CreateStore(Perm, DstWordI32Addr);
}
@@ -398,7 +418,8 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF,
}
llvm::SmallVector<llvm::Value *, 2> ArgsTail{IndexI32, Pack};
- llvm::Value *Perm = B.CreateCall(Bperm->getFunctionType(), Bperm,
ArgsTail);
+ llvm::Value *Perm =
+ B.CreateCall(Bperm->getFunctionType(), Bperm, ArgsTail);
for (uint64_t b = 0; b < tail; ++b) {
llvm::Value *Byte = B.CreateTrunc(B.CreateLShr(Perm, c32(8 * b)), I8);
@@ -408,10 +429,12 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF,
}
}
- // Load the final result from the destination temporary and return it as a
Value*.
+ // Load the final result from the destination temporary and return it as a
+ // Value*.
llvm::Value *Res = B.CreateLoad(DstAddr);
- // For aggregates (struct/array/union), ensure determinism by freezing the
value.
- // freeze turns any undef/poison in padding into a fixed but arbitrary
value.
+ // For aggregates (struct/array/union), ensure determinism by freezing the
+ // value. freeze turns any undef/poison in padding into a fixed but
+ // arbitrary value.
if (Res->getType()->isAggregateType())
Res = B.CreateFreeze(Res);
return Res;
@@ -420,8 +443,6 @@ emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF,
return emitAggregatePath();
}
-
-
} // namespace
// Generates the IR for __builtin_read_exec_*.
``````````
</details>
https://github.com/llvm/llvm-project/pull/153501
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits