[clang] [llvm] [AArch64] Implement NEON FP8 fused multiply-add intrinsics (non-indexed) (PR #123614)

2025-01-27 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov updated 
https://github.com/llvm/llvm-project/pull/123614

>From ae09723ecc1cc9bc2cbcef300b05aa2ce5ced448 Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Tue, 17 Dec 2024 11:42:42 +
Subject: [PATCH 1/2] [AArch64] Add FP8 Neon intrinsics for dot-product

THis patch adds the following intrinsics:

float16x4_t vdot_f16_mf8_fpm(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t vm, 
fpm_t fpm)
float16x8_t vdotq_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, 
fpm_t fpm)

float16x4_t vdot_lane_f16_mf8_fpm(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t 
vm, __builtin_constant_p(lane), fpm_t fpm)
float16x4_t vdot_laneq_f16_mf8_fpm(float16x4_t vd, mfloat8x8_t vn, mfloat8x16_t 
vm, __builtin_constant_p(lane), fpm_t fpm)
float16x8_t vdotq_lane_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn, mfloat8x8_t 
vm, __builtin_constant_p(lane), fpm_t fpm)
float16x8_t vdotq_laneq_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn, 
mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm)

[fixup] Remove not needed argument (NFC)

[fixup] Update intrinsics declarations

[fixup] Add C++ runs to tests, remove some opt passes
---
 clang/include/clang/Basic/arm_neon.td |  20 ++
 clang/include/clang/Basic/arm_neon_incl.td|   2 +-
 clang/lib/CodeGen/CGBuiltin.cpp   |  44 +++
 clang/lib/CodeGen/CodeGenFunction.h   |   4 +
 .../fp8-intrinsics/acle_neon_fp8_fdot.c   | 254 ++
 .../acle_neon_fp8_fdot.c  |  54 
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  21 ++
 .../lib/Target/AArch64/AArch64InstrFormats.td |  82 +++---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  14 +-
 llvm/test/CodeGen/AArch64/fp8-neon-fdot.ll|  74 +
 10 files changed, 529 insertions(+), 40 deletions(-)
 create mode 100644 
clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
 create mode 100644 clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fdot.c
 create mode 100644 llvm/test/CodeGen/AArch64/fp8-neon-fdot.ll

diff --git a/clang/include/clang/Basic/arm_neon.td 
b/clang/include/clang/Basic/arm_neon.td
index 9a6a77640ef5d3..c6609f312969ee 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -2141,6 +2141,26 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = 
"fp8,neon" in {
   def VCVTN_F8_F16  : VInst<"vcvt_mf8_f16_fpm",  ".(>F)(>F)V",  
"mQm">;
 }
 
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8dot2,neon" in {
+  def VDOT_F16_MF8 : VInst<"vdot_f16_mf8_fpm", "(>F)(>F)..V", "mQm">;
+
+  def VDOT_LANE_F16_MF8  : VInst<"vdot_lane_f16_mf8_fpm",  "(>F)(>F)..IV", 
"m", [ImmCheck<3, ImmCheck0_3, 0>]>;
+  def VDOT_LANEQ_F16_MF8 : VInst<"vdot_laneq_f16_mf8_fpm", "(>F)(>F).QIV", 
"m", [ImmCheck<3, ImmCheck0_7, 0>]>;
+
+  def VDOTQ_LANE_F16_MF8  : VInst<"vdot_lane_f16_mf8_fpm",  "(>F)(>F).qIV", 
"Qm", [ImmCheck<3, ImmCheck0_3, 0>]>;
+  def VDOTQ_LANEQ_F16_MF8 : VInst<"vdot_laneq_f16_mf8_fpm", "(>F)(>F)..IV", 
"Qm", [ImmCheck<3, ImmCheck0_7, 0>]>;
+}
+
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8dot4,neon" in {
+  def VDOT_F32_MF8 : VInst<"vdot_f32_mf8_fpm", "(>>F)(>>F)..V", "mQm">;
+
+  def VDOT_LANE_F32_MF8  : VInst<"vdot_lane_f32_mf8_fpm",  "(>>F)(>>F)..IV", 
"m", [ImmCheck<3, ImmCheck0_1, 0>]>;
+  def VDOT_LANEQ_F32_MF8 : VInst<"vdot_laneq_f32_mf8_fpm", "(>>F)(>>F).QIV", 
"m", [ImmCheck<3, ImmCheck0_3, 0>]>;
+
+  def VDOTQ_LANE_F32_MF8  : VInst<"vdot_lane_f32_mf8_fpm",  "(>>F)(>>F).qIV", 
"Qm", [ImmCheck<3, ImmCheck0_1, 0>]>;
+  def VDOTQ_LANEQ_F32_MF8 : VInst<"vdot_laneq_f32_mf8_fpm", "(>>F)(>>F)..IV", 
"Qm", [ImmCheck<3, ImmCheck0_3, 0>]>;
+}
+
 let ArchGuard = "defined(__aarch64__)", TargetGuard = "neon,faminmax" in {
   def FAMIN : WInst<"vamin", "...", "fhQdQfQh">;
   def FAMAX : WInst<"vamax", "...", "fhQdQfQh">;
diff --git a/clang/include/clang/Basic/arm_neon_incl.td 
b/clang/include/clang/Basic/arm_neon_incl.td
index 91a2bf3020b9a3..b9b9d509c22512 100644
--- a/clang/include/clang/Basic/arm_neon_incl.td
+++ b/clang/include/clang/Basic/arm_neon_incl.td
@@ -302,7 +302,7 @@ class Inst  ch = []>{
 class SInst ch = []> : Inst {}
 class IInst ch = []> : Inst {}
 class WInst ch = []> : Inst {}
-class VInst : Inst {}
+class VInst ch = []> : Inst {}
 
 // The following instruction classes are implemented via operators
 // instead of builtins. As such these declarations are only used for
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 0a06ce028a9160..b4b26eb84d5f92 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6766,6 +6766,24 @@ Value *CodeGenFunction::EmitFP8NeonCall(Function *F,
   return EmitNeonCall(F, Ops, name);
 }
 
+llvm::Value *CodeGenFunction::EmitFP8NeonFDOTCall(
+unsigned IID, bool ExtendLane, llvm::Type *RetTy,
+SmallVectorImpl &Ops, const CallExpr *E, const char *name) {
+
+  const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /

[clang] [llvm] [AArch64] Implement NEON FP8 fused multiply-add intrinsics (non-indexed) (PR #123614)

2025-01-22 Thread Jonathan Thackray via cfe-commits

https://github.com/jthackray approved this pull request.

Déjà vu, LGTM.

https://github.com/llvm/llvm-project/pull/123614
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement NEON FP8 fused multiply-add intrinsics (non-indexed) (PR #123614)

2025-01-20 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-codegen

Author: Momchil Velikov (momchil-velikov)


Changes

This patch adds the following intrinsics:

float16x8_t vmlalbq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t, 
fpm_t)
float16x8_t vmlaltq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t, 
fpm_t)

float32x4_t vmlallbbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, 
fpm_t)
float32x4_t vmlallbtq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, 
fpm_t)
float32x4_t vmlalltbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, 
fpm_t)
float32x4_t vmlallttq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, 
fpm_t)

Supersedes https://github.com/llvm/llvm-project/pull/120273

---

Patch is 160.53 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/123614.diff


37 Files Affected:

- (modified) clang/include/clang/AST/Type.h (+5) 
- (modified) clang/include/clang/Basic/AArch64SVEACLETypes.def (+17-20) 
- (modified) clang/include/clang/Basic/TargetBuiltins.h (+3-1) 
- (modified) clang/include/clang/Basic/arm_neon.td (+52) 
- (modified) clang/include/clang/Basic/arm_neon_incl.td (+2) 
- (modified) clang/lib/AST/ASTContext.cpp (+18-12) 
- (modified) clang/lib/AST/ItaniumMangle.cpp (+6-1) 
- (modified) clang/lib/AST/Type.cpp (+1-3) 
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+147) 
- (modified) clang/lib/CodeGen/CGExpr.cpp (+9-2) 
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+11) 
- (modified) clang/lib/CodeGen/CodeGenTypes.cpp (+13-5) 
- (modified) clang/lib/CodeGen/Targets/AArch64.cpp (+6-8) 
- (modified) clang/lib/Sema/SemaARM.cpp (+2) 
- (modified) clang/lib/Sema/SemaExpr.cpp (+6-1) 
- (modified) clang/lib/Sema/SemaType.cpp (+2-1) 
- (added) clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c (+123) 
- (added) clang/test/CodeGen/AArch64/fp8-cast.c (+193) 
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c (+316) 
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c (+254) 
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c (+121) 
- (modified) clang/test/CodeGen/arm-mfp8.c (+53-35) 
- (modified) clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp (+7) 
- (modified) clang/test/CodeGenCXX/mangle-neon-vectors.cpp (+11) 
- (added) clang/test/Sema/aarch64-fp8-cast.c (+104) 
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_cvt.c (+43) 
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fdot.c (+54) 
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fmla.c (+22) 
- (modified) clang/test/Sema/arm-mfp8.cpp (+22-12) 
- (modified) clang/utils/TableGen/NeonEmitter.cpp (+23-9) 
- (modified) clang/utils/TableGen/SveEmitter.cpp (+2-2) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+60) 
- (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+88-49) 
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+21-21) 
- (added) llvm/test/CodeGen/AArch64/fp8-neon-fdot.ll (+74) 
- (added) llvm/test/CodeGen/AArch64/fp8-neon-fmla.ll (+56) 
- (added) llvm/test/CodeGen/AArch64/neon-fp8-cvt.ll (+112) 


``diff
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 3457d524c63aaa..1d9743520654eb 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -2518,6 +2518,7 @@ class alignas(TypeAlignment) Type : public 
ExtQualsTypeCommonBase {
   bool isFloat32Type() const;
   bool isDoubleType() const;
   bool isBFloat16Type() const;
+  bool isMFloat8Type() const;
   bool isFloat128Type() const;
   bool isIbm128Type() const;
   bool isRealType() const; // C99 6.2.5p17 (real floating + integer)
@@ -8537,6 +8538,10 @@ inline bool Type::isBFloat16Type() const {
   return isSpecificBuiltinType(BuiltinType::BFloat16);
 }
 
+inline bool Type::isMFloat8Type() const {
+  return isSpecificBuiltinType(BuiltinType::MFloat8);
+}
+
 inline bool Type::isFloat128Type() const {
   return isSpecificBuiltinType(BuiltinType::Float128);
 }
diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def 
b/clang/include/clang/Basic/AArch64SVEACLETypes.def
index 063cac1f4a58ee..a408bb0c54057c 100644
--- a/clang/include/clang/Basic/AArch64SVEACLETypes.def
+++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def
@@ -57,6 +57,11 @@
 //  - IsBF true for vector of brain float elements.
 
//===--===//
 
+#ifndef SVE_SCALAR_TYPE
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \
+  SVE_TYPE(Name, Id, SingletonId)
+#endif
+
 #ifndef SVE_VECTOR_TYPE
 #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \
   SVE_TYPE(Name, Id, SingletonId)
@@ -72,6 +77,11 @@
   SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, 
NF, false, false, true)
 #endif
 
+#ifndef SVE_VECTOR_TYPE_MFLOAT
+#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, 
ElBits, NF) \
+  S

[clang] [llvm] [AArch64] Implement NEON FP8 fused multiply-add intrinsics (non-indexed) (PR #123614)

2025-01-20 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-arm

Author: Momchil Velikov (momchil-velikov)


Changes

This patch adds the following intrinsics:

float16x8_t vmlalbq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t, 
fpm_t)
float16x8_t vmlaltq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t, 
fpm_t)

float32x4_t vmlallbbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, 
fpm_t)
float32x4_t vmlallbtq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, 
fpm_t)
float32x4_t vmlalltbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, 
fpm_t)
float32x4_t vmlallttq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, 
fpm_t)

Supersedes https://github.com/llvm/llvm-project/pull/120273

---

Patch is 160.53 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/123614.diff


37 Files Affected:

- (modified) clang/include/clang/AST/Type.h (+5) 
- (modified) clang/include/clang/Basic/AArch64SVEACLETypes.def (+17-20) 
- (modified) clang/include/clang/Basic/TargetBuiltins.h (+3-1) 
- (modified) clang/include/clang/Basic/arm_neon.td (+52) 
- (modified) clang/include/clang/Basic/arm_neon_incl.td (+2) 
- (modified) clang/lib/AST/ASTContext.cpp (+18-12) 
- (modified) clang/lib/AST/ItaniumMangle.cpp (+6-1) 
- (modified) clang/lib/AST/Type.cpp (+1-3) 
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+147) 
- (modified) clang/lib/CodeGen/CGExpr.cpp (+9-2) 
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+11) 
- (modified) clang/lib/CodeGen/CodeGenTypes.cpp (+13-5) 
- (modified) clang/lib/CodeGen/Targets/AArch64.cpp (+6-8) 
- (modified) clang/lib/Sema/SemaARM.cpp (+2) 
- (modified) clang/lib/Sema/SemaExpr.cpp (+6-1) 
- (modified) clang/lib/Sema/SemaType.cpp (+2-1) 
- (added) clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c (+123) 
- (added) clang/test/CodeGen/AArch64/fp8-cast.c (+193) 
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c (+316) 
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c (+254) 
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c (+121) 
- (modified) clang/test/CodeGen/arm-mfp8.c (+53-35) 
- (modified) clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp (+7) 
- (modified) clang/test/CodeGenCXX/mangle-neon-vectors.cpp (+11) 
- (added) clang/test/Sema/aarch64-fp8-cast.c (+104) 
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_cvt.c (+43) 
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fdot.c (+54) 
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fmla.c (+22) 
- (modified) clang/test/Sema/arm-mfp8.cpp (+22-12) 
- (modified) clang/utils/TableGen/NeonEmitter.cpp (+23-9) 
- (modified) clang/utils/TableGen/SveEmitter.cpp (+2-2) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+60) 
- (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+88-49) 
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+21-21) 
- (added) llvm/test/CodeGen/AArch64/fp8-neon-fdot.ll (+74) 
- (added) llvm/test/CodeGen/AArch64/fp8-neon-fmla.ll (+56) 
- (added) llvm/test/CodeGen/AArch64/neon-fp8-cvt.ll (+112) 


``diff
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 3457d524c63aaa..1d9743520654eb 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -2518,6 +2518,7 @@ class alignas(TypeAlignment) Type : public 
ExtQualsTypeCommonBase {
   bool isFloat32Type() const;
   bool isDoubleType() const;
   bool isBFloat16Type() const;
+  bool isMFloat8Type() const;
   bool isFloat128Type() const;
   bool isIbm128Type() const;
   bool isRealType() const; // C99 6.2.5p17 (real floating + integer)
@@ -8537,6 +8538,10 @@ inline bool Type::isBFloat16Type() const {
   return isSpecificBuiltinType(BuiltinType::BFloat16);
 }
 
+inline bool Type::isMFloat8Type() const {
+  return isSpecificBuiltinType(BuiltinType::MFloat8);
+}
+
 inline bool Type::isFloat128Type() const {
   return isSpecificBuiltinType(BuiltinType::Float128);
 }
diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def 
b/clang/include/clang/Basic/AArch64SVEACLETypes.def
index 063cac1f4a58ee..a408bb0c54057c 100644
--- a/clang/include/clang/Basic/AArch64SVEACLETypes.def
+++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def
@@ -57,6 +57,11 @@
 //  - IsBF true for vector of brain float elements.
 
//===--===//
 
+#ifndef SVE_SCALAR_TYPE
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \
+  SVE_TYPE(Name, Id, SingletonId)
+#endif
+
 #ifndef SVE_VECTOR_TYPE
 #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \
   SVE_TYPE(Name, Id, SingletonId)
@@ -72,6 +77,11 @@
   SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, 
NF, false, false, true)
 #endif
 
+#ifndef SVE_VECTOR_TYPE_MFLOAT
+#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, 
ElBits, NF) \
+  SVE

[clang] [llvm] [AArch64] Implement NEON FP8 fused multiply-add intrinsics (non-indexed) (PR #123614)

2025-01-20 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Momchil Velikov (momchil-velikov)


Changes

This patch adds the following intrinsics:

float16x8_t vmlalbq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t, 
fpm_t)
float16x8_t vmlaltq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t, 
fpm_t)

float32x4_t vmlallbbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, 
fpm_t)
float32x4_t vmlallbtq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, 
fpm_t)
float32x4_t vmlalltbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, 
fpm_t)
float32x4_t vmlallttq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, 
fpm_t)

Supersedes https://github.com/llvm/llvm-project/pull/120273

---

Patch is 160.53 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/123614.diff


37 Files Affected:

- (modified) clang/include/clang/AST/Type.h (+5) 
- (modified) clang/include/clang/Basic/AArch64SVEACLETypes.def (+17-20) 
- (modified) clang/include/clang/Basic/TargetBuiltins.h (+3-1) 
- (modified) clang/include/clang/Basic/arm_neon.td (+52) 
- (modified) clang/include/clang/Basic/arm_neon_incl.td (+2) 
- (modified) clang/lib/AST/ASTContext.cpp (+18-12) 
- (modified) clang/lib/AST/ItaniumMangle.cpp (+6-1) 
- (modified) clang/lib/AST/Type.cpp (+1-3) 
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+147) 
- (modified) clang/lib/CodeGen/CGExpr.cpp (+9-2) 
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+11) 
- (modified) clang/lib/CodeGen/CodeGenTypes.cpp (+13-5) 
- (modified) clang/lib/CodeGen/Targets/AArch64.cpp (+6-8) 
- (modified) clang/lib/Sema/SemaARM.cpp (+2) 
- (modified) clang/lib/Sema/SemaExpr.cpp (+6-1) 
- (modified) clang/lib/Sema/SemaType.cpp (+2-1) 
- (added) clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c (+123) 
- (added) clang/test/CodeGen/AArch64/fp8-cast.c (+193) 
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c (+316) 
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c (+254) 
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c (+121) 
- (modified) clang/test/CodeGen/arm-mfp8.c (+53-35) 
- (modified) clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp (+7) 
- (modified) clang/test/CodeGenCXX/mangle-neon-vectors.cpp (+11) 
- (added) clang/test/Sema/aarch64-fp8-cast.c (+104) 
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_cvt.c (+43) 
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fdot.c (+54) 
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fmla.c (+22) 
- (modified) clang/test/Sema/arm-mfp8.cpp (+22-12) 
- (modified) clang/utils/TableGen/NeonEmitter.cpp (+23-9) 
- (modified) clang/utils/TableGen/SveEmitter.cpp (+2-2) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+60) 
- (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+88-49) 
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+21-21) 
- (added) llvm/test/CodeGen/AArch64/fp8-neon-fdot.ll (+74) 
- (added) llvm/test/CodeGen/AArch64/fp8-neon-fmla.ll (+56) 
- (added) llvm/test/CodeGen/AArch64/neon-fp8-cvt.ll (+112) 


``diff
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 3457d524c63aaa..1d9743520654eb 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -2518,6 +2518,7 @@ class alignas(TypeAlignment) Type : public 
ExtQualsTypeCommonBase {
   bool isFloat32Type() const;
   bool isDoubleType() const;
   bool isBFloat16Type() const;
+  bool isMFloat8Type() const;
   bool isFloat128Type() const;
   bool isIbm128Type() const;
   bool isRealType() const; // C99 6.2.5p17 (real floating + integer)
@@ -8537,6 +8538,10 @@ inline bool Type::isBFloat16Type() const {
   return isSpecificBuiltinType(BuiltinType::BFloat16);
 }
 
+inline bool Type::isMFloat8Type() const {
+  return isSpecificBuiltinType(BuiltinType::MFloat8);
+}
+
 inline bool Type::isFloat128Type() const {
   return isSpecificBuiltinType(BuiltinType::Float128);
 }
diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def 
b/clang/include/clang/Basic/AArch64SVEACLETypes.def
index 063cac1f4a58ee..a408bb0c54057c 100644
--- a/clang/include/clang/Basic/AArch64SVEACLETypes.def
+++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def
@@ -57,6 +57,11 @@
 //  - IsBF true for vector of brain float elements.
 
//===--===//
 
+#ifndef SVE_SCALAR_TYPE
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \
+  SVE_TYPE(Name, Id, SingletonId)
+#endif
+
 #ifndef SVE_VECTOR_TYPE
 #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \
   SVE_TYPE(Name, Id, SingletonId)
@@ -72,6 +77,11 @@
   SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, 
NF, false, false, true)
 #endif
 
+#ifndef SVE_VECTOR_TYPE_MFLOAT
+#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, 
ElBits, NF) \
+  SVE_VECTO