[clang] [llvm] [AArch64] Implement NEON FP8 fused multiply-add intrinsics (non-indexed) (PR #123614)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/123614 >From ae09723ecc1cc9bc2cbcef300b05aa2ce5ced448 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 17 Dec 2024 11:42:42 + Subject: [PATCH 1/2] [AArch64] Add FP8 Neon intrinsics for dot-product THis patch adds the following intrinsics: float16x4_t vdot_f16_mf8_fpm(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t vm, fpm_t fpm) float16x8_t vdotq_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpm) float16x4_t vdot_lane_f16_mf8_fpm(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t vm, __builtin_constant_p(lane), fpm_t fpm) float16x4_t vdot_laneq_f16_mf8_fpm(float16x4_t vd, mfloat8x8_t vn, mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm) float16x8_t vdotq_lane_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn, mfloat8x8_t vm, __builtin_constant_p(lane), fpm_t fpm) float16x8_t vdotq_laneq_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm) [fixup] Remove not needed argument (NFC) [fixup] Update intrinsics declarations [fixup] Add C++ runs to tests, remove some opt passes --- clang/include/clang/Basic/arm_neon.td | 20 ++ clang/include/clang/Basic/arm_neon_incl.td| 2 +- clang/lib/CodeGen/CGBuiltin.cpp | 44 +++ clang/lib/CodeGen/CodeGenFunction.h | 4 + .../fp8-intrinsics/acle_neon_fp8_fdot.c | 254 ++ .../acle_neon_fp8_fdot.c | 54 llvm/include/llvm/IR/IntrinsicsAArch64.td | 21 ++ .../lib/Target/AArch64/AArch64InstrFormats.td | 82 +++--- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 14 +- llvm/test/CodeGen/AArch64/fp8-neon-fdot.ll| 74 + 10 files changed, 529 insertions(+), 40 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c create mode 100644 clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fdot.c create mode 100644 llvm/test/CodeGen/AArch64/fp8-neon-fdot.ll diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 9a6a77640ef5d3..c6609f312969ee 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -2141,6 +2141,26 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,neon" in { def VCVTN_F8_F16 : VInst<"vcvt_mf8_f16_fpm", ".(>F)(>F)V", "mQm">; } +let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8dot2,neon" in { + def VDOT_F16_MF8 : VInst<"vdot_f16_mf8_fpm", "(>F)(>F)..V", "mQm">; + + def VDOT_LANE_F16_MF8 : VInst<"vdot_lane_f16_mf8_fpm", "(>F)(>F)..IV", "m", [ImmCheck<3, ImmCheck0_3, 0>]>; + def VDOT_LANEQ_F16_MF8 : VInst<"vdot_laneq_f16_mf8_fpm", "(>F)(>F).QIV", "m", [ImmCheck<3, ImmCheck0_7, 0>]>; + + def VDOTQ_LANE_F16_MF8 : VInst<"vdot_lane_f16_mf8_fpm", "(>F)(>F).qIV", "Qm", [ImmCheck<3, ImmCheck0_3, 0>]>; + def VDOTQ_LANEQ_F16_MF8 : VInst<"vdot_laneq_f16_mf8_fpm", "(>F)(>F)..IV", "Qm", [ImmCheck<3, ImmCheck0_7, 0>]>; +} + +let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8dot4,neon" in { + def VDOT_F32_MF8 : VInst<"vdot_f32_mf8_fpm", "(>>F)(>>F)..V", "mQm">; + + def VDOT_LANE_F32_MF8 : VInst<"vdot_lane_f32_mf8_fpm", "(>>F)(>>F)..IV", "m", [ImmCheck<3, ImmCheck0_1, 0>]>; + def VDOT_LANEQ_F32_MF8 : VInst<"vdot_laneq_f32_mf8_fpm", "(>>F)(>>F).QIV", "m", [ImmCheck<3, ImmCheck0_3, 0>]>; + + def VDOTQ_LANE_F32_MF8 : VInst<"vdot_lane_f32_mf8_fpm", "(>>F)(>>F).qIV", "Qm", [ImmCheck<3, ImmCheck0_1, 0>]>; + def VDOTQ_LANEQ_F32_MF8 : VInst<"vdot_laneq_f32_mf8_fpm", "(>>F)(>>F)..IV", "Qm", [ImmCheck<3, ImmCheck0_3, 0>]>; +} + let ArchGuard = "defined(__aarch64__)", TargetGuard = "neon,faminmax" in { def FAMIN : WInst<"vamin", "...", "fhQdQfQh">; def FAMAX : WInst<"vamax", "...", "fhQdQfQh">; diff --git a/clang/include/clang/Basic/arm_neon_incl.td b/clang/include/clang/Basic/arm_neon_incl.td index 91a2bf3020b9a3..b9b9d509c22512 100644 --- a/clang/include/clang/Basic/arm_neon_incl.td +++ b/clang/include/clang/Basic/arm_neon_incl.td @@ -302,7 +302,7 @@ class Inst ch = []>{ class SInst ch = []> : Inst {} class IInst ch = []> : Inst {} class WInst ch = []> : Inst {} -class VInst : Inst {} +class VInst ch = []> : Inst {} // The following instruction classes are implemented via operators // instead of builtins. As such these declarations are only used for diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0a06ce028a9160..b4b26eb84d5f92 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6766,6 +6766,24 @@ Value *CodeGenFunction::EmitFP8NeonCall(Function *F, return EmitNeonCall(F, Ops, name); } +llvm::Value *CodeGenFunction::EmitFP8NeonFDOTCall( +unsigned IID, bool ExtendLane, llvm::Type *RetTy, +SmallVectorImpl &Ops, const CallExpr *E, const char *name) { + + const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
[clang] [llvm] [AArch64] Implement NEON FP8 fused multiply-add intrinsics (non-indexed) (PR #123614)
https://github.com/jthackray approved this pull request. Déjà vu, LGTM. https://github.com/llvm/llvm-project/pull/123614 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Implement NEON FP8 fused multiply-add intrinsics (non-indexed) (PR #123614)
llvmbot wrote: @llvm/pr-subscribers-clang-codegen Author: Momchil Velikov (momchil-velikov) Changes This patch adds the following intrinsics: float16x8_t vmlalbq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float16x8_t vmlaltq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float32x4_t vmlallbbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float32x4_t vmlallbtq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float32x4_t vmlalltbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float32x4_t vmlallttq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, fpm_t) Supersedes https://github.com/llvm/llvm-project/pull/120273 --- Patch is 160.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123614.diff 37 Files Affected: - (modified) clang/include/clang/AST/Type.h (+5) - (modified) clang/include/clang/Basic/AArch64SVEACLETypes.def (+17-20) - (modified) clang/include/clang/Basic/TargetBuiltins.h (+3-1) - (modified) clang/include/clang/Basic/arm_neon.td (+52) - (modified) clang/include/clang/Basic/arm_neon_incl.td (+2) - (modified) clang/lib/AST/ASTContext.cpp (+18-12) - (modified) clang/lib/AST/ItaniumMangle.cpp (+6-1) - (modified) clang/lib/AST/Type.cpp (+1-3) - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+147) - (modified) clang/lib/CodeGen/CGExpr.cpp (+9-2) - (modified) clang/lib/CodeGen/CodeGenFunction.h (+11) - (modified) clang/lib/CodeGen/CodeGenTypes.cpp (+13-5) - (modified) clang/lib/CodeGen/Targets/AArch64.cpp (+6-8) - (modified) clang/lib/Sema/SemaARM.cpp (+2) - (modified) clang/lib/Sema/SemaExpr.cpp (+6-1) - (modified) clang/lib/Sema/SemaType.cpp (+2-1) - (added) clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c (+123) - (added) clang/test/CodeGen/AArch64/fp8-cast.c (+193) - (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c (+316) - (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c (+254) - (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c (+121) - (modified) clang/test/CodeGen/arm-mfp8.c (+53-35) - (modified) clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp (+7) - (modified) clang/test/CodeGenCXX/mangle-neon-vectors.cpp (+11) - (added) clang/test/Sema/aarch64-fp8-cast.c (+104) - (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_cvt.c (+43) - (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fdot.c (+54) - (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fmla.c (+22) - (modified) clang/test/Sema/arm-mfp8.cpp (+22-12) - (modified) clang/utils/TableGen/NeonEmitter.cpp (+23-9) - (modified) clang/utils/TableGen/SveEmitter.cpp (+2-2) - (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+60) - (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+88-49) - (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+21-21) - (added) llvm/test/CodeGen/AArch64/fp8-neon-fdot.ll (+74) - (added) llvm/test/CodeGen/AArch64/fp8-neon-fmla.ll (+56) - (added) llvm/test/CodeGen/AArch64/neon-fp8-cvt.ll (+112) ``diff diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 3457d524c63aaa..1d9743520654eb 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2518,6 +2518,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { bool isFloat32Type() const; bool isDoubleType() const; bool isBFloat16Type() const; + bool isMFloat8Type() const; bool isFloat128Type() const; bool isIbm128Type() const; bool isRealType() const; // C99 6.2.5p17 (real floating + integer) @@ -8537,6 +8538,10 @@ inline bool Type::isBFloat16Type() const { return isSpecificBuiltinType(BuiltinType::BFloat16); } +inline bool Type::isMFloat8Type() const { + return isSpecificBuiltinType(BuiltinType::MFloat8); +} + inline bool Type::isFloat128Type() const { return isSpecificBuiltinType(BuiltinType::Float128); } diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def index 063cac1f4a58ee..a408bb0c54057c 100644 --- a/clang/include/clang/Basic/AArch64SVEACLETypes.def +++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def @@ -57,6 +57,11 @@ // - IsBF true for vector of brain float elements. //===--===// +#ifndef SVE_SCALAR_TYPE +#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \ + SVE_TYPE(Name, Id, SingletonId) +#endif + #ifndef SVE_VECTOR_TYPE #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \ SVE_TYPE(Name, Id, SingletonId) @@ -72,6 +77,11 @@ SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, false, true) #endif +#ifndef SVE_VECTOR_TYPE_MFLOAT +#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF) \ + S
[clang] [llvm] [AArch64] Implement NEON FP8 fused multiply-add intrinsics (non-indexed) (PR #123614)
llvmbot wrote: @llvm/pr-subscribers-backend-arm Author: Momchil Velikov (momchil-velikov) Changes This patch adds the following intrinsics: float16x8_t vmlalbq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float16x8_t vmlaltq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float32x4_t vmlallbbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float32x4_t vmlallbtq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float32x4_t vmlalltbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float32x4_t vmlallttq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, fpm_t) Supersedes https://github.com/llvm/llvm-project/pull/120273 --- Patch is 160.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123614.diff 37 Files Affected: - (modified) clang/include/clang/AST/Type.h (+5) - (modified) clang/include/clang/Basic/AArch64SVEACLETypes.def (+17-20) - (modified) clang/include/clang/Basic/TargetBuiltins.h (+3-1) - (modified) clang/include/clang/Basic/arm_neon.td (+52) - (modified) clang/include/clang/Basic/arm_neon_incl.td (+2) - (modified) clang/lib/AST/ASTContext.cpp (+18-12) - (modified) clang/lib/AST/ItaniumMangle.cpp (+6-1) - (modified) clang/lib/AST/Type.cpp (+1-3) - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+147) - (modified) clang/lib/CodeGen/CGExpr.cpp (+9-2) - (modified) clang/lib/CodeGen/CodeGenFunction.h (+11) - (modified) clang/lib/CodeGen/CodeGenTypes.cpp (+13-5) - (modified) clang/lib/CodeGen/Targets/AArch64.cpp (+6-8) - (modified) clang/lib/Sema/SemaARM.cpp (+2) - (modified) clang/lib/Sema/SemaExpr.cpp (+6-1) - (modified) clang/lib/Sema/SemaType.cpp (+2-1) - (added) clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c (+123) - (added) clang/test/CodeGen/AArch64/fp8-cast.c (+193) - (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c (+316) - (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c (+254) - (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c (+121) - (modified) clang/test/CodeGen/arm-mfp8.c (+53-35) - (modified) clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp (+7) - (modified) clang/test/CodeGenCXX/mangle-neon-vectors.cpp (+11) - (added) clang/test/Sema/aarch64-fp8-cast.c (+104) - (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_cvt.c (+43) - (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fdot.c (+54) - (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fmla.c (+22) - (modified) clang/test/Sema/arm-mfp8.cpp (+22-12) - (modified) clang/utils/TableGen/NeonEmitter.cpp (+23-9) - (modified) clang/utils/TableGen/SveEmitter.cpp (+2-2) - (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+60) - (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+88-49) - (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+21-21) - (added) llvm/test/CodeGen/AArch64/fp8-neon-fdot.ll (+74) - (added) llvm/test/CodeGen/AArch64/fp8-neon-fmla.ll (+56) - (added) llvm/test/CodeGen/AArch64/neon-fp8-cvt.ll (+112) ``diff diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 3457d524c63aaa..1d9743520654eb 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2518,6 +2518,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { bool isFloat32Type() const; bool isDoubleType() const; bool isBFloat16Type() const; + bool isMFloat8Type() const; bool isFloat128Type() const; bool isIbm128Type() const; bool isRealType() const; // C99 6.2.5p17 (real floating + integer) @@ -8537,6 +8538,10 @@ inline bool Type::isBFloat16Type() const { return isSpecificBuiltinType(BuiltinType::BFloat16); } +inline bool Type::isMFloat8Type() const { + return isSpecificBuiltinType(BuiltinType::MFloat8); +} + inline bool Type::isFloat128Type() const { return isSpecificBuiltinType(BuiltinType::Float128); } diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def index 063cac1f4a58ee..a408bb0c54057c 100644 --- a/clang/include/clang/Basic/AArch64SVEACLETypes.def +++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def @@ -57,6 +57,11 @@ // - IsBF true for vector of brain float elements. //===--===// +#ifndef SVE_SCALAR_TYPE +#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \ + SVE_TYPE(Name, Id, SingletonId) +#endif + #ifndef SVE_VECTOR_TYPE #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \ SVE_TYPE(Name, Id, SingletonId) @@ -72,6 +77,11 @@ SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, false, true) #endif +#ifndef SVE_VECTOR_TYPE_MFLOAT +#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF) \ + SVE
[clang] [llvm] [AArch64] Implement NEON FP8 fused multiply-add intrinsics (non-indexed) (PR #123614)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Momchil Velikov (momchil-velikov) Changes This patch adds the following intrinsics: float16x8_t vmlalbq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float16x8_t vmlaltq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float32x4_t vmlallbbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float32x4_t vmlallbtq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float32x4_t vmlalltbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, fpm_t) float32x4_t vmlallttq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, fpm_t) Supersedes https://github.com/llvm/llvm-project/pull/120273 --- Patch is 160.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123614.diff 37 Files Affected: - (modified) clang/include/clang/AST/Type.h (+5) - (modified) clang/include/clang/Basic/AArch64SVEACLETypes.def (+17-20) - (modified) clang/include/clang/Basic/TargetBuiltins.h (+3-1) - (modified) clang/include/clang/Basic/arm_neon.td (+52) - (modified) clang/include/clang/Basic/arm_neon_incl.td (+2) - (modified) clang/lib/AST/ASTContext.cpp (+18-12) - (modified) clang/lib/AST/ItaniumMangle.cpp (+6-1) - (modified) clang/lib/AST/Type.cpp (+1-3) - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+147) - (modified) clang/lib/CodeGen/CGExpr.cpp (+9-2) - (modified) clang/lib/CodeGen/CodeGenFunction.h (+11) - (modified) clang/lib/CodeGen/CodeGenTypes.cpp (+13-5) - (modified) clang/lib/CodeGen/Targets/AArch64.cpp (+6-8) - (modified) clang/lib/Sema/SemaARM.cpp (+2) - (modified) clang/lib/Sema/SemaExpr.cpp (+6-1) - (modified) clang/lib/Sema/SemaType.cpp (+2-1) - (added) clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c (+123) - (added) clang/test/CodeGen/AArch64/fp8-cast.c (+193) - (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c (+316) - (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c (+254) - (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c (+121) - (modified) clang/test/CodeGen/arm-mfp8.c (+53-35) - (modified) clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp (+7) - (modified) clang/test/CodeGenCXX/mangle-neon-vectors.cpp (+11) - (added) clang/test/Sema/aarch64-fp8-cast.c (+104) - (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_cvt.c (+43) - (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fdot.c (+54) - (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fmla.c (+22) - (modified) clang/test/Sema/arm-mfp8.cpp (+22-12) - (modified) clang/utils/TableGen/NeonEmitter.cpp (+23-9) - (modified) clang/utils/TableGen/SveEmitter.cpp (+2-2) - (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+60) - (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+88-49) - (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+21-21) - (added) llvm/test/CodeGen/AArch64/fp8-neon-fdot.ll (+74) - (added) llvm/test/CodeGen/AArch64/fp8-neon-fmla.ll (+56) - (added) llvm/test/CodeGen/AArch64/neon-fp8-cvt.ll (+112) ``diff diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 3457d524c63aaa..1d9743520654eb 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2518,6 +2518,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { bool isFloat32Type() const; bool isDoubleType() const; bool isBFloat16Type() const; + bool isMFloat8Type() const; bool isFloat128Type() const; bool isIbm128Type() const; bool isRealType() const; // C99 6.2.5p17 (real floating + integer) @@ -8537,6 +8538,10 @@ inline bool Type::isBFloat16Type() const { return isSpecificBuiltinType(BuiltinType::BFloat16); } +inline bool Type::isMFloat8Type() const { + return isSpecificBuiltinType(BuiltinType::MFloat8); +} + inline bool Type::isFloat128Type() const { return isSpecificBuiltinType(BuiltinType::Float128); } diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def index 063cac1f4a58ee..a408bb0c54057c 100644 --- a/clang/include/clang/Basic/AArch64SVEACLETypes.def +++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def @@ -57,6 +57,11 @@ // - IsBF true for vector of brain float elements. //===--===// +#ifndef SVE_SCALAR_TYPE +#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \ + SVE_TYPE(Name, Id, SingletonId) +#endif + #ifndef SVE_VECTOR_TYPE #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \ SVE_TYPE(Name, Id, SingletonId) @@ -72,6 +77,11 @@ SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, false, true) #endif +#ifndef SVE_VECTOR_TYPE_MFLOAT +#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF) \ + SVE_VECTO