Author: Ties Stuij Date: 2020-06-05T14:11:51+01:00 New Revision: a6fcf5ca033a83b815f760664e0cff91c2c13dcd
URL: https://github.com/llvm/llvm-project/commit/a6fcf5ca033a83b815f760664e0cff91c2c13dcd DIFF: https://github.com/llvm/llvm-project/commit/a6fcf5ca033a83b815f760664e0cff91c2c13dcd.diff LOG: [clang][BFloat] add NEON emitter for bfloat Summary: This patch adds the bfloat16_t struct typedefs (e.g. bfloat16x8x2_t) to arm_neon.h This patch is part of a series implementing the Bfloat16 extension of the Armv8.6-a architecture, as detailed here: https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a The bfloat type, and its properties are specified in the Arm Architecture Reference Manual: https://developer.arm.com/docs/ddi0487/latest/arm-architecture-reference-manual-armv8-for-armv8-a-architecture-profile The following people contributed to this patch: - Luke Cheeseman - Simon Tatham - Ties Stuij Reviewers: t.p.northover, fpetrogalli, sdesmalen, az, LukeGeeson Reviewed By: fpetrogalli Subscribers: SjoerdMeijer, LukeGeeson, pbarrio, mgorny, kristof.beyls, ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D79708 Added: clang/include/clang/Basic/arm_bf16.td Modified: clang/include/clang/Basic/arm_neon_incl.td clang/lib/Basic/Targets/AArch64.cpp clang/lib/Basic/Targets/ARM.cpp clang/lib/Headers/CMakeLists.txt clang/test/CodeGen/arm-bf16-params-returns.c clang/test/Preprocessor/aarch64-target-features.c clang/test/Preprocessor/arm-target-features.c clang/utils/TableGen/NeonEmitter.cpp clang/utils/TableGen/TableGen.cpp clang/utils/TableGen/TableGenBackends.h Removed: ################################################################################ diff --git a/clang/include/clang/Basic/arm_bf16.td b/clang/include/clang/Basic/arm_bf16.td new file mode 100644 index 000000000000..d837a7666d40 --- /dev/null +++ b/clang/include/clang/Basic/arm_bf16.td @@ -0,0 +1,14 @@ +//===--- arm_fp16.td - ARM BF16 compiler interface ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the TableGen definitions from which the ARM BF16 header +// file will be generated. +// +//===----------------------------------------------------------------------===// + +include "arm_neon_incl.td" diff --git a/clang/include/clang/Basic/arm_neon_incl.td b/clang/include/clang/Basic/arm_neon_incl.td index 7593fdcfb486..a1031fe4ad4f 100644 --- a/clang/include/clang/Basic/arm_neon_incl.td +++ b/clang/include/clang/Basic/arm_neon_incl.td @@ -215,6 +215,7 @@ def OP_UNAVAILABLE : Operation { // f: float // h: half-float // d: double +// b: bfloat16 // // Typespec modifiers // ------------------ @@ -236,6 +237,7 @@ def OP_UNAVAILABLE : Operation { // S: change to signed integer category. // U: change to unsigned integer category. // F: change to floating category. +// B: change to BFloat16 // P: change to polynomial category. // p: change polynomial to equivalent integer category. Otherwise nop. // diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 080571c6ea4f..b474d1203dee 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -286,6 +286,12 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasMatMul) Builder.defineMacro("__ARM_FEATURE_MATMUL_INT8", "1"); + if (HasBFloat16) { + Builder.defineMacro("__ARM_FEATURE_BF16", "1"); + Builder.defineMacro("__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", "1"); + Builder.defineMacro("__ARM_BF16_FORMAT_ALTERNATIVE", "1"); + } + if ((FPU & NeonMode) && HasFP16FML) Builder.defineMacro("__ARM_FEATURE_FP16FML", "1"); diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index 5e605abfc137..21cfe0107bbb 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -838,6 +838,12 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, if (HasMatMul) Builder.defineMacro("__ARM_FEATURE_MATMUL_INT8", "1"); + if (HasBFloat16) { + Builder.defineMacro("__ARM_FEATURE_BF16", "1"); + Builder.defineMacro("__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", "1"); + Builder.defineMacro("__ARM_BF16_FORMAT_ALTERNATIVE", "1"); + } + switch (ArchKind) { default: break; diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index c5215eede3f9..1a1f7b30f106 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -190,6 +190,8 @@ clang_generate_header(-gen-arm-neon arm_neon.td arm_neon.h) clang_generate_header(-gen-arm-fp16 arm_fp16.td arm_fp16.h) # Generate arm_sve.h clang_generate_header(-gen-arm-sve-header arm_sve.td arm_sve.h) +# Generate arm_bf16.h +clang_generate_header(-gen-arm-bf16 arm_bf16.td arm_bf16.h) # Generate arm_mve.h clang_generate_header(-gen-arm-mve-header arm_mve.td arm_mve.h) # Generate arm_cde.h diff --git a/clang/test/CodeGen/arm-bf16-params-returns.c b/clang/test/CodeGen/arm-bf16-params-returns.c index f3b1a1d3fad1..11e236c51530 100644 --- a/clang/test/CodeGen/arm-bf16-params-returns.c +++ b/clang/test/CodeGen/arm-bf16-params-returns.c @@ -3,6 +3,8 @@ // RUN: %clang_cc1 -triple armv8.6a-arm-none-eabi -target-abi aapcs -mfloat-abi softfp -target-feature +bf16 -target-feature +neon -emit-llvm -O2 -o - %s | opt -S -mem2reg -sroa | FileCheck %s --check-prefix=CHECK32-SOFTFP // RUN: %clang_cc1 -triple aarch64-arm-none-eabi -target-abi aapcs -mfloat-abi softfp -target-feature +bf16 -target-feature +neon -emit-llvm -O2 -o - %s | opt -S -mem2reg -sroa | FileCheck %s --check-prefix=CHECK64-SOFTFP +#include <arm_neon.h> + // function return types __bf16 test_ret_bf16(__bf16 v) { return v; @@ -16,3 +18,15 @@ __bf16 test_ret_bf16(__bf16 v) { // CHECK32-SOFTFP: ret i32 %tmp2.0.insert.ext // CHECK64-SOFTFP: define bfloat @test_ret_bf16(bfloat returned %v) {{.*}} { // CHECK64-SOFTFP: ret bfloat %v + +bfloat16x4_t test_ret_bf16x4_t(bfloat16x4_t v) { + return v; +} +// CHECK32-HARD: define arm_aapcs_vfpcc <4 x bfloat> @test_ret_bf16x4_t(<4 x bfloat> returned %v) {{.*}} { +// CHECK32-HARD: ret <4 x bfloat> %v +// CHECK64-HARD: define <4 x bfloat> @test_ret_bf16x4_t(<4 x bfloat> returned %v) {{.*}} { +// CHECK64-HARD: ret <4 x bfloat> %v +// CHECK32-SOFTFP: define <2 x i32> @test_ret_bf16x4_t(<2 x i32> [[V0:.*]]) {{.*}} { +// CHECK32-SOFTFP: ret <2 x i32> %v +// CHECK64-SOFTFP: define <4 x bfloat> @test_ret_bf16x4_t(<4 x bfloat> returned %v) {{.*}} { +// CHECK64-SOFTFP: ret <4 x bfloat> %v diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c index 8ce6b8a8a45d..784c67cd643d 100644 --- a/clang/test/Preprocessor/aarch64-target-features.c +++ b/clang/test/Preprocessor/aarch64-target-features.c @@ -41,6 +41,12 @@ // CHECK-NOT: __ARM_FEATURE_DOTPROD // CHECK-NOT: __ARM_FEATURE_PAC_DEFAULT // CHECK-NOT: __ARM_FEATURE_BTI_DEFAULT +// CHECK-NOT: __ARM_BF16_FORMAT_ALTERNATIVE 1 +// CHECK-NOT: __ARM_FEATURE_BF16 1 +// CHECK-NOT: __ARM_FEATURE_BF16_VECTOR_ARITHMETIC 1 + +// RUN: %clang -target aarch64_be-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-BIGENDIAN +// CHECK-BIGENDIAN: __ARM_BIG_ENDIAN 1 // RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+crypto -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRYPTO %s // RUN: %clang -target arm64-none-linux-gnu -march=armv8-a+crypto -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRYPTO %s @@ -368,3 +374,10 @@ // RUN: %clang -target arm64-none-linux-gnu -march=armv8-a -mbranch-protection=pac-ret+bti -x c -E -dM %s -o - | FileCheck -check-prefix=CHECK-BTI %s // CHECK-BTI-OFF-NOT: __ARM_FEATURE_BTI_DEFAULT // CHECK-BTI: #define __ARM_FEATURE_BTI_DEFAULT 1 + +// ================== Check BFloat16 Extensions. +// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.6-a+bf16 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-BFLOAT %s +// CHECK-BFLOAT: __ARM_BF16_FORMAT_ALTERNATIVE 1 +// CHECK-BFLOAT: __ARM_FEATURE_BF16 1 +// CHECK-BFLOAT: __ARM_FEATURE_BF16_VECTOR_ARITHMETIC 1 + diff --git a/clang/test/Preprocessor/arm-target-features.c b/clang/test/Preprocessor/arm-target-features.c index 363dfdd68c40..5eaffa1c372c 100644 --- a/clang/test/Preprocessor/arm-target-features.c +++ b/clang/test/Preprocessor/arm-target-features.c @@ -7,6 +7,9 @@ // CHECK-V8A: #define __ARM_FEATURE_NUMERIC_MAXMIN 1 // CHECK-V8A-NOT: #define __ARM_FP 0x // CHECK-V8A-NOT: #define __ARM_FEATURE_DOTPROD +// CHECK-V8A-NOT: #define __ARM_BF16_FORMAT_ALTERNATIVE +// CHECK-V8A-NOT: #define __ARM_FEATURE_BF16 +// CHECK-V8A-NOT: #define __ARM_FEATURE_BF16_VECTOR_ARITHMETIC // RUN: %clang -target armv8a-none-linux-gnueabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V8A-ALLOW-FP-INSTR %s // RUN: %clang -target armv8a-none-linux-gnueabihf -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V8A-ALLOW-FP-INSTR %s @@ -848,3 +851,9 @@ // RUN: %clang -target arm-none-none-eabi -march=armv7-m -mfpu=softvfp -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SOFTVFP %s // CHECK-SOFTVFP-NOT: #define __ARM_FP 0x + +// ================== Check BFloat16 Extensions. +// RUN: %clang -target arm-arm-none-eabi -march=armv8.6-a+bf16 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-BFLOAT %s +// CHECK-BFLOAT: #define __ARM_BF16_FORMAT_ALTERNATIVE 1 +// CHECK-BFLOAT: #define __ARM_FEATURE_BF16 1 +// CHECK-BFLOAT: #define __ARM_FEATURE_BF16_VECTOR_ARITHMETIC 1 diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index e93c4c653edf..f94166590bd3 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -99,7 +99,8 @@ enum EltType { Poly128, Float16, Float32, - Float64 + Float64, + BFloat16 }; } // end namespace NeonTypeFlags @@ -147,6 +148,7 @@ class Type { SInt, UInt, Poly, + BFloat16, }; TypeKind Kind; bool Immediate, Constant, Pointer; @@ -199,6 +201,7 @@ class Type { bool isInt() const { return isInteger() && ElementBitwidth == 32; } bool isLong() const { return isInteger() && ElementBitwidth == 64; } bool isVoid() const { return Kind == Void; } + bool isBFloat16() const { return Kind == BFloat16; } unsigned getNumElements() const { return Bitwidth / ElementBitwidth; } unsigned getSizeInBits() const { return Bitwidth; } unsigned getElementSizeInBits() const { return ElementBitwidth; } @@ -583,8 +586,11 @@ class NeonEmitter { // runFP16 - Emit arm_fp16.h.inc void runFP16(raw_ostream &o); - // runHeader - Emit all the __builtin prototypes used in arm_neon.h - // and arm_fp16.h + // runBF16 - Emit arm_bf16.h.inc + void runBF16(raw_ostream &o); + + // runHeader - Emit all the __builtin prototypes used in arm_neon.h, + // arm_fp16.h and arm_bf16.h void runHeader(raw_ostream &o); // runTests - Emit tests for all the Neon intrinsics. @@ -609,6 +615,8 @@ std::string Type::str() const { S += "poly"; else if (isFloating()) S += "float"; + else if (isBFloat16()) + S += "bfloat"; else S += "int"; @@ -648,7 +656,10 @@ std::string Type::builtin_str() const { case 128: S += "LLLi"; break; default: llvm_unreachable("Unhandled case!"); } - else + else if (isBFloat16()) { + assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits"); + S += "y"; + } else switch (ElementBitwidth) { case 16: S += "h"; break; case 32: S += "f"; break; @@ -702,6 +713,11 @@ unsigned Type::getNeonEnum() const { Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1); } + if (isBFloat16()) { + assert(Addend == 1 && "BFloat16 is only 16 bit"); + Base = (unsigned)NeonTypeFlags::BFloat16; + } + if (Bitwidth == 128) Base |= (unsigned)NeonTypeFlags::QuadFlag; if (isInteger() && !isSigned()) @@ -725,6 +741,9 @@ Type Type::fromTypedefName(StringRef Name) { } else if (Name.startswith("poly")) { T.Kind = Poly; Name = Name.drop_front(4); + } else if (Name.startswith("bfloat")) { + T.Kind = BFloat16; + Name = Name.drop_front(6); } else { assert(Name.startswith("int")); Name = Name.drop_front(3); @@ -823,6 +842,10 @@ void Type::applyTypespec(bool &Quad) { if (isPoly()) NumVectors = 0; break; + case 'b': + Kind = BFloat16; + ElementBitwidth = 16; + break; default: llvm_unreachable("Unhandled type code!"); } @@ -849,6 +872,10 @@ void Type::applyModifiers(StringRef Mods) { case 'U': Kind = UInt; break; + case 'B': + Kind = BFloat16; + ElementBitwidth = 16; + break; case 'F': Kind = Float; break; @@ -930,6 +957,9 @@ std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { if (CK == ClassB) return ""; + if (T.isBFloat16()) + return "bf16"; + if (T.isPoly()) typeCode = 'p'; else if (T.isInteger()) @@ -967,7 +997,7 @@ std::string Intrinsic::getBuiltinTypeStr() { Type RetT = getReturnType(); if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && - !RetT.isFloating()) + !RetT.isFloating() && !RetT.isBFloat16()) RetT.makeInteger(RetT.getElementSizeInBits(), false); // Since the return value must be one type, return a vector type of the @@ -2162,6 +2192,74 @@ void NeonEmitter::runHeader(raw_ostream &OS) { genIntrinsicRangeCheckCode(OS, Defs); } +static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) { + std::string TypedefTypes(types); + std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes); + + // Emit vector typedefs. + bool InIfdef = false; + for (auto &TS : TDTypeVec) { + bool IsA64 = false; + Type T(TS, "."); + if (T.isDouble()) + IsA64 = true; + + if (InIfdef && !IsA64) { + OS << "#endif\n"; + InIfdef = false; + } + if (!InIfdef && IsA64) { + OS << "#ifdef __aarch64__\n"; + InIfdef = true; + } + + if (T.isPoly()) + OS << "typedef __attribute__((neon_polyvector_type("; + else + OS << "typedef __attribute__((neon_vector_type("; + + Type T2 = T; + T2.makeScalar(); + OS << T.getNumElements() << "))) "; + OS << T2.str(); + OS << " " << T.str() << ";\n"; + } + if (InIfdef) + OS << "#endif\n"; + OS << "\n"; + + // Emit struct typedefs. + InIfdef = false; + for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { + for (auto &TS : TDTypeVec) { + bool IsA64 = false; + Type T(TS, "."); + if (T.isDouble()) + IsA64 = true; + + if (InIfdef && !IsA64) { + OS << "#endif\n"; + InIfdef = false; + } + if (!InIfdef && IsA64) { + OS << "#ifdef __aarch64__\n"; + InIfdef = true; + } + + const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0}; + Type VT(TS, Mods); + OS << "typedef struct " << VT.str() << " {\n"; + OS << " " << T.str() << " val"; + OS << "[" << NumMembers << "]"; + OS << ";\n} "; + OS << VT.str() << ";\n"; + OS << "\n"; + } + } + if (InIfdef) + OS << "#endif\n"; +} + /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h /// is comprised of type definitions and function declarations. void NeonEmitter::run(raw_ostream &OS) { @@ -2216,6 +2314,11 @@ void NeonEmitter::run(raw_ostream &OS) { OS << "#include <stdint.h>\n\n"; + OS << "#ifdef __ARM_FEATURE_BF16\n"; + OS << "#include <arm_bf16.h>\n"; + OS << "typedef __bf16 bfloat16_t;\n"; + OS << "#endif\n\n"; + // Emit NEON-specific scalar typedefs. OS << "typedef float float32_t;\n"; OS << "typedef __fp16 float16_t;\n"; @@ -2236,74 +2339,11 @@ void NeonEmitter::run(raw_ostream &OS) { OS << "typedef int64_t poly64_t;\n"; OS << "#endif\n"; - // Emit Neon vector typedefs. - std::string TypedefTypes( - "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl"); - std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes); + emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl", OS); - // Emit vector typedefs. - bool InIfdef = false; - for (auto &TS : TDTypeVec) { - bool IsA64 = false; - Type T(TS, "."); - if (T.isDouble()) - IsA64 = true; - - if (InIfdef && !IsA64) { - OS << "#endif\n"; - InIfdef = false; - } - if (!InIfdef && IsA64) { - OS << "#ifdef __aarch64__\n"; - InIfdef = true; - } - - if (T.isPoly()) - OS << "typedef __attribute__((neon_polyvector_type("; - else - OS << "typedef __attribute__((neon_vector_type("; - - Type T2 = T; - T2.makeScalar(); - OS << T.getNumElements() << "))) "; - OS << T2.str(); - OS << " " << T.str() << ";\n"; - } - if (InIfdef) - OS << "#endif\n"; - OS << "\n"; - - // Emit struct typedefs. - InIfdef = false; - for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { - for (auto &TS : TDTypeVec) { - bool IsA64 = false; - Type T(TS, "."); - if (T.isDouble()) - IsA64 = true; - - if (InIfdef && !IsA64) { - OS << "#endif\n"; - InIfdef = false; - } - if (!InIfdef && IsA64) { - OS << "#ifdef __aarch64__\n"; - InIfdef = true; - } - - const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0}; - Type VT(TS, Mods); - OS << "typedef struct " << VT.str() << " {\n"; - OS << " " << T.str() << " val"; - OS << "[" << NumMembers << "]"; - OS << ";\n} "; - OS << VT.str() << ";\n"; - OS << "\n"; - } - } - if (InIfdef) - OS << "#endif\n"; - OS << "\n"; + OS << "#ifdef __ARM_FEATURE_BF16\n"; + emitNeonTypeDefs("bQb", OS); + OS << "#endif\n\n"; OS << "#define __ai static __inline__ __attribute__((__always_inline__, " "__nodebug__))\n\n"; @@ -2470,6 +2510,84 @@ void NeonEmitter::runFP16(raw_ostream &OS) { OS << "#endif /* __ARM_FP16_H */\n"; } +void NeonEmitter::runBF16(raw_ostream &OS) { + OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics " + "-----------------------------------===\n" + " *\n" + " *\n" + " * Part of the LLVM Project, under the Apache License v2.0 with LLVM " + "Exceptions.\n" + " * See https://llvm.org/LICENSE.txt for license information.\n" + " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n" + " *\n" + " *===-----------------------------------------------------------------" + "------===\n" + " */\n\n"; + + OS << "#ifndef __ARM_BF16_H\n"; + OS << "#define __ARM_BF16_H\n\n"; + + OS << "typedef __bf16 bfloat16_t;\n"; + + OS << "#define __ai static __inline__ __attribute__((__always_inline__, " + "__nodebug__))\n\n"; + + SmallVector<Intrinsic *, 128> Defs; + std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); + for (auto *R : RV) + createIntrinsic(R, Defs); + + for (auto *I : Defs) + I->indexBody(); + + llvm::stable_sort(Defs, llvm::deref<std::less<>>()); + + // Only emit a def when its requirements have been met. + // FIXME: This loop could be made faster, but it's fast enough for now. + bool MadeProgress = true; + std::string InGuard; + while (!Defs.empty() && MadeProgress) { + MadeProgress = false; + + for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); + I != Defs.end(); /*No step*/) { + bool DependenciesSatisfied = true; + for (auto *II : (*I)->getDependencies()) { + if (llvm::is_contained(Defs, II)) + DependenciesSatisfied = false; + } + if (!DependenciesSatisfied) { + // Try the next one. + ++I; + continue; + } + + // Emit #endif/#if pair if needed. + if ((*I)->getGuard() != InGuard) { + if (!InGuard.empty()) + OS << "#endif\n"; + InGuard = (*I)->getGuard(); + if (!InGuard.empty()) + OS << "#if " << InGuard << "\n"; + } + + // Actually generate the intrinsic code. + OS << (*I)->generate(); + + MadeProgress = true; + I = Defs.erase(I); + } + } + assert(Defs.empty() && "Some requirements were not satisfied!"); + if (!InGuard.empty()) + OS << "#endif\n"; + + OS << "\n"; + OS << "#undef __ai\n\n"; + + OS << "#endif\n"; +} + void clang::EmitNeon(RecordKeeper &Records, raw_ostream &OS) { NeonEmitter(Records).run(OS); } @@ -2478,6 +2596,10 @@ void clang::EmitFP16(RecordKeeper &Records, raw_ostream &OS) { NeonEmitter(Records).runFP16(OS); } +void clang::EmitBF16(RecordKeeper &Records, raw_ostream &OS) { + NeonEmitter(Records).runBF16(OS); +} + void clang::EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { NeonEmitter(Records).runHeader(OS); } diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp index 43b59468ec2e..1d6ef8065bb8 100644 --- a/clang/utils/TableGen/TableGen.cpp +++ b/clang/utils/TableGen/TableGen.cpp @@ -63,6 +63,7 @@ enum ActionType { GenClangOpenCLBuiltins, GenArmNeon, GenArmFP16, + GenArmBF16, GenArmNeonSema, GenArmNeonTest, GenArmMveHeader, @@ -186,6 +187,7 @@ cl::opt<ActionType> Action( "Generate OpenCL builtin declaration handlers"), clEnumValN(GenArmNeon, "gen-arm-neon", "Generate arm_neon.h for clang"), clEnumValN(GenArmFP16, "gen-arm-fp16", "Generate arm_fp16.h for clang"), + clEnumValN(GenArmBF16, "gen-arm-bf16", "Generate arm_bf16.h for clang"), clEnumValN(GenArmNeonSema, "gen-arm-neon-sema", "Generate ARM NEON sema support for clang"), clEnumValN(GenArmNeonTest, "gen-arm-neon-test", @@ -360,6 +362,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenArmFP16: EmitFP16(Records, OS); break; + case GenArmBF16: + EmitBF16(Records, OS); + break; case GenArmNeonSema: EmitNeonSema(Records, OS); break; diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h index 0fd125b40a58..9717903ba52c 100644 --- a/clang/utils/TableGen/TableGenBackends.h +++ b/clang/utils/TableGen/TableGenBackends.h @@ -85,6 +85,7 @@ void EmitClangOpcodes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitNeon(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitFP16(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitBF16(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitNeonSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitNeonTest(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitNeon2(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits