llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-loongarch Author: None (Ami-zhang) <details> <summary>Changes</summary> This PR contains 3 commits: 1. Updated the FP16 implementation to pass arguments via FPR instead of the original GPR. 2. Added support for the _Float16 type and fixed 2 related issues. 3. Added support for the __bf16 type. --- Patch is 219.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141564.diff 15 Files Affected: - (modified) clang/docs/LanguageExtensions.rst (+2) - (modified) clang/lib/Basic/Targets/LoongArch.h (+8) - (modified) clang/lib/CodeGen/Targets/LoongArch.cpp (+3-4) - (added) clang/test/CodeGen/LoongArch/__fp16-convert.c (+30) - (modified) clang/test/CodeGen/LoongArch/abi-lp64d.c (+71) - (added) clang/test/CodeGen/LoongArch/bfloat-abi.c (+611) - (added) clang/test/CodeGen/LoongArch/bfloat-mangle.cpp (+19) - (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+181-3) - (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+26) - (added) llvm/test/CodeGen/LoongArch/bf16-promote.ll (+172) - (added) llvm/test/CodeGen/LoongArch/bf16.ll (+1048) - (added) llvm/test/CodeGen/LoongArch/calling-conv-half.ll (+1626) - (modified) llvm/test/CodeGen/LoongArch/fp16-promote.ll (+131-71) - (added) llvm/test/CodeGen/LoongArch/issue97975.ll (+438) - (added) llvm/test/CodeGen/LoongArch/issue97981.ll (+127) ``````````diff diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index a40dd4d1a1673..4fa91b95c45e0 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -1001,6 +1001,7 @@ to ``float``; see below for more information on this emulation. * X86 (if SSE2 is available; natively if AVX512-FP16 is also available) * RISC-V (natively if Zfh or Zhinx is available) * SystemZ (emulated) + * LoongArch * ``__bf16`` is supported on the following targets (currently never natively): @@ -1008,6 +1009,7 @@ to ``float``; see below for more information on this emulation. * 64-bit ARM (AArch64) * RISC-V * X86 (when SSE2 is available) + * LoongArch (For X86, SSE2 is available on 64-bit and all recent 32-bit processors.) diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h index 4c7b53abfef9b..7e9affc98ac0f 100644 --- a/clang/lib/Basic/Targets/LoongArch.h +++ b/clang/lib/Basic/Targets/LoongArch.h @@ -49,10 +49,14 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { HasFeatureLD_SEQ_SA = false; HasFeatureDiv32 = false; HasFeatureSCQ = false; + BFloat16Width = 16; + BFloat16Align = 16; + BFloat16Format = &llvm::APFloat::BFloat(); LongDoubleWidth = 128; LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::IEEEquad(); MCountName = "_mcount"; + HasFloat16 = true; SuitableAlign = 128; WCharType = SignedInt; WIntType = UnsignedInt; @@ -98,6 +102,10 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { bool hasBitIntType() const override { return true; } + bool hasBFloat16Type() const override { return true; } + + bool useFP16ConversionIntrinsics() const override { return false; } + bool handleTargetFeatures(std::vector<std::string> &Features, DiagnosticsEngine &Diags) override; diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp index 0f689371a60db..7640f3779816a 100644 --- a/clang/lib/CodeGen/Targets/LoongArch.cpp +++ b/clang/lib/CodeGen/Targets/LoongArch.cpp @@ -110,10 +110,9 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper( uint64_t Size = getContext().getTypeSize(Ty); if (IsInt && Size > GRLen) return false; - // Can't be eligible if larger than the FP registers. Half precision isn't - // currently supported on LoongArch and the ABI hasn't been confirmed, so - // default to the integer ABI in that case. - if (IsFloat && (Size > FRLen || Size < 32)) + // Can't be eligible if larger than the FP registers. Handling of half + // precision values has been specified in the ABI, so don't block those. + if (IsFloat && Size > FRLen) return false; // Can't be eligible if an integer type was already found (int+int pairs // are not eligible). diff --git a/clang/test/CodeGen/LoongArch/__fp16-convert.c b/clang/test/CodeGen/LoongArch/__fp16-convert.c new file mode 100644 index 0000000000000..84ef5de960b47 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/__fp16-convert.c @@ -0,0 +1,30 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple loongarch64 -emit-llvm %s -o - \ +// RUN: | FileCheck %s + +__fp16 y; +short z; +// CHECK-LABEL: define dso_local void @bar1( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr @y, align 2 +// CHECK-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to float +// CHECK-NEXT: [[CONV1:%.*]] = fptosi float [[CONV]] to i16 +// CHECK-NEXT: store i16 [[CONV1]], ptr @z, align 2 +// CHECK-NEXT: ret void +// +void bar1(){ + z = y; +} +// CHECK-LABEL: define dso_local void @bar2( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @z, align 2 +// CHECK-NEXT: [[CONV:%.*]] = sitofp i16 [[TMP0]] to float +// CHECK-NEXT: [[CONV1:%.*]] = fptrunc float [[CONV]] to half +// CHECK-NEXT: store half [[CONV1]], ptr @y, align 2 +// CHECK-NEXT: ret void +// +void bar2(){ + y = z; +} diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d.c b/clang/test/CodeGen/LoongArch/abi-lp64d.c index fc7f1eada586b..9f64cfd662e5f 100644 --- a/clang/test/CodeGen/LoongArch/abi-lp64d.c +++ b/clang/test/CodeGen/LoongArch/abi-lp64d.c @@ -48,6 +48,9 @@ unsigned long check_ulong() { return 0; } // CHECK-LABEL: define{{.*}} i64 @check_ulonglong() unsigned long long check_ulonglong() { return 0; } +// CHECK-LABEL: define{{.*}} half @check_float16() +_Float16 check_float16() { return 0; } + // CHECK-LABEL: define{{.*}} float @check_float() float check_float() { return 0; } @@ -127,6 +130,14 @@ struct i16x4_s f_i16x4_s(struct i16x4_s x) { /// available, the value is passed in a GAR; if no GAR is available, the value /// is passed on the stack. +struct f16x1_s { + __fp16 a; +}; + +struct float16x1_s { + _Float16 a; +}; + struct f32x1_s { float a; }; @@ -135,6 +146,16 @@ struct f64x1_s { double a; }; +// CHECK-LABEL: define{{.*}} half @f_f16x1_s(half %0) +struct f16x1_s f_f16x1_s(struct f16x1_s x) { + return x; +} + +// CHECK-LABEL: define{{.*}} half @f_float16x1_s(half %0) +struct float16x1_s f_float16x1_s(struct float16x1_s x) { + return x; +} + // CHECK-LABEL: define{{.*}} float @f_f32x1_s(float %0) struct f32x1_s f_f32x1_s(struct f32x1_s x) { return x; @@ -151,10 +172,20 @@ struct f64x1_s f_f64x1_s(struct f64x1_s x) { /// number of available FAR is less than 2, it’s passed in a GAR, and passed on /// the stack if no GAR is available. +struct f16x2_s { + __fp16 a; + _Float16 b; +}; + struct f32x2_s { float a, b; }; +// CHECK-LABEL: define{{.*}} { half, half } @f_f16x2_s(half %0, half %1) +struct f16x2_s f_f16x2_s(struct f16x2_s x) { + return x; +} + // CHECK-LABEL: define{{.*}} { float, float } @f_f32x2_s(float %0, float %1) struct f32x2_s f_f32x2_s(struct f32x2_s x) { return x; @@ -165,11 +196,21 @@ struct f32x2_s f_f32x2_s(struct f32x2_s x) { /// i. Multiple fixed-point members. If there are available GAR, the structure /// is passed in a GAR, and passed on the stack if no GAR is available. +struct f16x1_i16x2_s { + _Float16 a; + int16_t b, c; +}; + struct f32x1_i16x2_s { float a; int16_t b, c; }; +// CHECK-LABEL: define{{.*}} i64 @f_f16x1_i16x2_s(i64 %x.coerce) +struct f16x1_i16x2_s f_f16x1_i16x2_s(struct f16x1_i16x2_s x) { + return x; +} + // CHECK-LABEL: define{{.*}} i64 @f_f32x1_i16x2_s(i64 %x.coerce) struct f32x1_i16x2_s f_f32x1_i16x2_s(struct f32x1_i16x2_s x) { return x; @@ -181,11 +222,21 @@ struct f32x1_i16x2_s f_f32x1_i16x2_s(struct f32x1_i16x2_s x) { /// but one GAR is available, it’s passed in GAR; If no GAR is available, it’s /// passed on the stack. +struct f16x1_i32x1_s { + _Float16 a; + int32_t b; +}; + struct f32x1_i32x1_s { float a; int32_t b; }; +// CHECK-LABEL: define{{.*}} { half, i32 } @f_f16x1_i32x1_s(half %0, i32 %1) +struct f16x1_i32x1_s f_f16x1_i32x1_s(struct f16x1_i32x1_s x) { + return x; +} + // CHECK-LABEL: define{{.*}} { float, i32 } @f_f32x1_i32x1_s(float %0, i32 %1) struct f32x1_i32x1_s f_f32x1_i32x1_s(struct f32x1_i32x1_s x) { return x; @@ -253,6 +304,16 @@ struct f32x4_s f_f32x4_s(struct f32x4_s x) { return x; } +struct f16x5_s { + _Float16 a, b, c, d; + __fp16 e; +}; + +// CHECK-LABEL: define{{.*}} [2 x i64] @f_f16x5_s([2 x i64] %x.coerce) +struct f16x5_s f_f16x5_s(struct f16x5_s x) { + return x; +} + /// ii. The structure with two double members is passed in a pair of available /// FARs. If no a pair of available FARs, it’s passed in GARs. A structure with /// one double member and one float member is same. @@ -312,6 +373,16 @@ struct f32x2_i32x2_s f_f32x2_i32x2_s(struct f32x2_i32x2_s x) { return x; } +struct f16x4_i32x2_s { + _Float16 a, b, c, d; + int32_t e, f; +}; + +// CHECK-LABEL: define{{.*}} [2 x i64] @f_f16x4_i32x2_s([2 x i64] %x.coerce) +struct f16x4_i32x2_s f_f16x4_i32x2_s(struct f16x4_i32x2_s x) { + return x; +} + /// 3. WOA > 2 × GRLEN /// a. It’s passed by reference and are replaced in the argument list with the /// address. If there is an available GAR, the reference is passed in the GAR, diff --git a/clang/test/CodeGen/LoongArch/bfloat-abi.c b/clang/test/CodeGen/LoongArch/bfloat-abi.c new file mode 100644 index 0000000000000..9f0e25c17cc74 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/bfloat-abi.c @@ -0,0 +1,611 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// RUN: %clang_cc1 -triple loongarch64 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK-LA64 +// RUN: %clang_cc1 -triple loongarch32 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK-LA32 + +struct bfloat1 { + __bf16 a; +}; + +// CHECK-LA64-LABEL: define dso_local bfloat @h1 +// CHECK-LA64-SAME: (bfloat noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-LA64-NEXT: entry: +// CHECK-LA64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT1:%.*]], align 2 +// CHECK-LA64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-LA64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-LA64-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT1]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-LA64-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { bfloat }, ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA64-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[TMP1]], align 2 +// CHECK-LA64-NEXT: ret bfloat [[TMP2]] +// +// CHECK-LA32-LABEL: define dso_local bfloat @h1 +// CHECK-LA32-SAME: (bfloat noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-LA32-NEXT: entry: +// CHECK-LA32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT1:%.*]], align 2 +// CHECK-LA32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-LA32-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-LA32-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT1]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA32-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-LA32-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { bfloat }, ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA32-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[TMP1]], align 2 +// CHECK-LA32-NEXT: ret bfloat [[TMP2]] +// +struct bfloat1 h1(__bf16 a) { + struct bfloat1 x; + x.a = a; + return x; +} + +struct bfloat2 { + __bf16 a; + __bf16 b; +}; + +// CHECK-LA64-LABEL: define dso_local { bfloat, bfloat } @h2 +// CHECK-LA64-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-LA64-NEXT: entry: +// CHECK-LA64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT2:%.*]], align 2 +// CHECK-LA64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-LA64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-LA64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-LA64-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-LA64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-LA64-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-LA64-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw { bfloat, bfloat }, ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA64-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[TMP2]], align 2 +// CHECK-LA64-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw { bfloat, bfloat }, ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA64-NEXT: [[TMP5:%.*]] = load bfloat, ptr [[TMP4]], align 2 +// CHECK-LA64-NEXT: [[TMP6:%.*]] = insertvalue { bfloat, bfloat } poison, bfloat [[TMP3]], 0 +// CHECK-LA64-NEXT: [[TMP7:%.*]] = insertvalue { bfloat, bfloat } [[TMP6]], bfloat [[TMP5]], 1 +// CHECK-LA64-NEXT: ret { bfloat, bfloat } [[TMP7]] +// +// CHECK-LA32-LABEL: define dso_local { bfloat, bfloat } @h2 +// CHECK-LA32-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-LA32-NEXT: entry: +// CHECK-LA32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT2:%.*]], align 2 +// CHECK-LA32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-LA32-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-LA32-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-LA32-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA32-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-LA32-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-LA32-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA32-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-LA32-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw { bfloat, bfloat }, ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA32-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[TMP2]], align 2 +// CHECK-LA32-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw { bfloat, bfloat }, ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA32-NEXT: [[TMP5:%.*]] = load bfloat, ptr [[TMP4]], align 2 +// CHECK-LA32-NEXT: [[TMP6:%.*]] = insertvalue { bfloat, bfloat } poison, bfloat [[TMP3]], 0 +// CHECK-LA32-NEXT: [[TMP7:%.*]] = insertvalue { bfloat, bfloat } [[TMP6]], bfloat [[TMP5]], 1 +// CHECK-LA32-NEXT: ret { bfloat, bfloat } [[TMP7]] +// +struct bfloat2 h2(__bf16 a, __bf16 b) { + struct bfloat2 x; + x.a = a; + x.b = b; + return x; +} + +struct bfloat3 { + __bf16 a; + __bf16 b; + __bf16 c; +}; + +// CHECK-LA64-LABEL: define dso_local i64 @h3 +// CHECK-LA64-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-LA64-NEXT: entry: +// CHECK-LA64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT3:%.*]], align 2 +// CHECK-LA64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: [[RETVAL_COERCE:%.*]] = alloca i64, align 8 +// CHECK-LA64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-LA64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-LA64-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-LA64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-LA64-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-LA64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-LA64-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-LA64-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-LA64-NEXT: [[C3:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-LA64-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2 +// CHECK-LA64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 2 [[RETVAL]], i64 6, i1 false) +// CHECK-LA64-NEXT: [[TMP3:%.*]] = load i64, ptr [[RETVAL_COERCE]], align 8 +// CHECK-LA64-NEXT: ret i64 [[TMP3]] +// +// CHECK-LA32-LABEL: define dso_local [2 x i32] @h3 +// CHECK-LA32-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-LA32-NEXT: entry: +// CHECK-LA32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT3:%.*]], align 2 +// CHECK-LA32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: [[RETVAL_COERCE:%.*]] = alloca [2 x i32], align 4 +// CHECK-LA32-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-LA32-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-LA32-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-LA32-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-LA32-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA32-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-LA32-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-LA32-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA32-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-LA32-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-LA32-NEXT: [[C3:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-LA32-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2 +// CHECK-LA32-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL_COERCE]], ptr align 2 [[RETVAL]], i32 6, i1 false) +// CHECK-LA32-NEXT: [[TMP3:%.*]] = load [2 x i32], ptr [[RETVAL_COERCE]], align 4 +// CHECK-LA32-NEXT: ret [2 x i32] [[TMP3]] +// +struct bfloat3 h3(__bf16 a, __bf16 b, __bf16 c) { + struct bfloat3 x; + x.a = a; + x.b = b; + x.c = c; + return x; +} + +struct bfloat4 { + __bf16 a; + __bf16 b; + __bf16 c; + __bf16 d; +}; + +// CHECK-LA64-LABEL: define dso_local i64 @h4 +// CHECK-LA64-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK-LA64-NEXT: entry: +// CHECK-LA64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT4:%.*]], align 2 +// CHECK-LA64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-LA64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-LA64-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-LA64-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-LA64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-LA64-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-LA64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-LA64-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-LA64-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-LA64-NEXT: [[C3:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-LA64-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2 +// CHECK-LA64-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-LA64-NEXT: [[D4:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 3 +// CHECK-LA64-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 2 +// CHECK-LA64-NEXT: [[TMP4:%.*]] = load i64, ptr [[RETVAL]], align 2 +// CHECK-LA64-NEXT: ret i64 [[TMP4]] +// +// CHECK-LA32-LABEL: define dso_local [2 x i32] @h4 +// CHECK-LA32-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK-LA32-NEXT: entry: +// CHECK-LA32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT4:%.*]], align 2 +// CHECK-LA32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: [[D_ADDR:%.*]... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/141564 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits