https://github.com/paulwalker-arm updated 
https://github.com/llvm/llvm-project/pull/204201

>From b9544c102502bc55bf1a17ba63cb77710e4b50cd Mon Sep 17 00:00:00 2001
From: Paul Walker <[email protected]>
Date: Tue, 16 Jun 2026 16:33:13 +0100
Subject: [PATCH 1/2] [Clang][NEON ACLE] Remove +bf16 requirement from opaque
 bfloat builtins.

Builtins that only care about the size of the element type but not
its format (e.g loads, stores and shuffles) do not require any special
instructions to code generate beyond those already available to +neon.
---
 clang/include/clang/Basic/arm_neon.td         | 15 +++--
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp      | 56 -------------------
 .../CodeGen/AArch64/bf16-getset-intrinsics.c  |  2 +-
 .../CodeGen/AArch64/bf16-lane-intrinsics.c    |  4 +-
 .../CodeGen/AArch64/bf16-ldst-intrinsics.c    |  4 +-
 .../AArch64/bf16-reinterpret-intrinsics.c     |  2 +-
 .../CodeGen/arm-bf16-reinterpret-intrinsics.c |  2 +-
 clang/test/Sema/aarch64-neon-target.c         |  3 -
 .../aarch64-neon-without-target-feature.cpp   |  4 +-
 clang/test/Sema/arm-neon-target.c             |  3 -
 10 files changed, 19 insertions(+), 76 deletions(-)

diff --git a/clang/include/clang/Basic/arm_neon.td 
b/clang/include/clang/Basic/arm_neon.td
index 3bf140ff953b9..dda6ff59179bb 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -285,7 +285,7 @@ def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
                    
"UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPlmQm",
                    [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
-let TargetGuard = "bf16,neon" in {
+let TargetGuard = "neon" in {
   def SPLAT_BF  : WInst<"splat_lane", ".(!q)I", "bQb",
                       [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
   def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb",
@@ -2024,8 +2024,7 @@ let ArchGuard = "defined(__aarch64__) || 
defined(__arm64ec__)", TargetGuard = "v
   def VCMLAQ_ROT270_FP64 : SInst<"vcmlaq_rot270", "QQQQ", "d">;
 }
 
-// V8.2-A BFloat intrinsics
-let TargetGuard = "bf16,neon" in {
+let TargetGuard = "neon" in {
   def VCREATE_BF : NoTestOpInst<"vcreate", ".(IU>)", "b", OP_CAST> {
     let BigEndianSafe = 1;
   }
@@ -2088,7 +2087,11 @@ let TargetGuard = "bf16,neon" in {
   def VLD2_DUP_BF : WInst<"vld2_dup", "2(c*!)", "bQb">;
   def VLD3_DUP_BF : WInst<"vld3_dup", "3(c*!)", "bQb">;
   def VLD4_DUP_BF : WInst<"vld4_dup", "4(c*!)", "bQb">;
+}
+
+// V8.2-A BFloat intrinsics
 
+let TargetGuard = "bf16,neon" in {
   def VCVT_F32_BF16 : SOpInst<"vcvt_f32_bf16", "(F>)(Bq!)",  "Qb", 
OP_VCVT_F32_BF16>;
   def VCVT_LOW_F32_BF16 : SOpInst<"vcvt_low_f32", "(F>)(BQ!)",  "Qb", 
OP_VCVT_F32_BF16_LO>;
   def VCVT_HIGH_F32_BF16 : SOpInst<"vcvt_high_f32", "(F>)(BQ!)", "Qb", 
OP_VCVT_F32_BF16_HI>;
@@ -2108,21 +2111,23 @@ let ArchGuard = "defined(__aarch64__) || 
defined(__arm64ec__)", TargetGuard = "b
   def VCVT_LOW_BF16_F32_A64 : SInst<"vcvt_low_bf16", "BQ", "Qf">;
   def VCVT_HIGH_BF16_F32_A64 : SInst<"vcvt_high_bf16", "BBQ", "Qf">;
   def VCVT_BF16_F32 : SInst<"vcvt_bf16", "BQ", "f">;
+}
 
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = 
"neon" in {
   def COPY_LANE_BF16 : IOpInst<"vcopy_lane", "..I.I", "b", OP_COPY_LN>;
   def COPYQ_LANE_BF16 : IOpInst<"vcopy_lane", "..IqI", "Qb", OP_COPY_LN>;
   def COPY_LANEQ_BF16 : IOpInst<"vcopy_laneq", "..IQI", "b", OP_COPY_LN>;
   def COPYQ_LANEQ_BF16 : IOpInst<"vcopy_laneq", "..I.I", "Qb", OP_COPY_LN>;
 }
 
-let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)", TargetGuard 
= "bf16,neon" in {
+let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)", TargetGuard 
= "neon" in {
   let BigEndianSafe = 1 in {
     defm VREINTERPRET_BF : REINTERPRET_CROSS_TYPES<
         "csilUcUsUiUlhfPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQPcQPsQPl", "bQb">;
   }
 }
 
-let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = 
"bf16,neon" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = 
"neon" in {
   let BigEndianSafe = 1 in {
     defm VVREINTERPRET_BF : REINTERPRET_CROSS_TYPES<
         "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", "bQb">;
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 6d6f87a9439df..e6b18405c7e35 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -858,10 +858,6 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] 
= {
 
 // Some intrinsics are equivalent for codegen.
 static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
-  { NEON::BI__builtin_neon_splat_lane_bf16, 
NEON::BI__builtin_neon_splat_lane_v, },
-  { NEON::BI__builtin_neon_splat_laneq_bf16, 
NEON::BI__builtin_neon_splat_laneq_v, },
-  { NEON::BI__builtin_neon_splatq_lane_bf16, 
NEON::BI__builtin_neon_splatq_lane_v, },
-  { NEON::BI__builtin_neon_splatq_laneq_bf16, 
NEON::BI__builtin_neon_splatq_laneq_v, },
   { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
   { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
   { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
@@ -890,36 +886,6 @@ static const std::pair<unsigned, unsigned> 
NEONEquivalentIntrinsicMap[] = {
   { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
   { NEON::BI__builtin_neon_vfmaq_lane_f16, 
NEON::BI__builtin_neon_vfmaq_lane_v, },
   { NEON::BI__builtin_neon_vfmaq_laneq_f16, 
NEON::BI__builtin_neon_vfmaq_laneq_v, },
-  { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
-  { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
-  { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
-  { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
-  { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
-  { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v 
},
-  { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
-  { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
-  { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
-  { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
-  { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v 
},
-  { NEON::BI__builtin_neon_vld1q_lane_bf16, 
NEON::BI__builtin_neon_vld1q_lane_v },
-  { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
-  { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
-  { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v 
},
-  { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
-  { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v 
},
-  { NEON::BI__builtin_neon_vld2q_lane_bf16, 
NEON::BI__builtin_neon_vld2q_lane_v },
-  { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
-  { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
-  { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v 
},
-  { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
-  { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v 
},
-  { NEON::BI__builtin_neon_vld3q_lane_bf16, 
NEON::BI__builtin_neon_vld3q_lane_v },
-  { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
-  { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
-  { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v 
},
-  { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
-  { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v 
},
-  { NEON::BI__builtin_neon_vld4q_lane_bf16, 
NEON::BI__builtin_neon_vld4q_lane_v },
   { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
   { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
   { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
@@ -964,28 +930,6 @@ static const std::pair<unsigned, unsigned> 
NEONEquivalentIntrinsicMap[] = {
   { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
   { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
   { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
-  { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
-  { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
-  { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
-  { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
-  { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v 
},
-  { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
-  { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
-  { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
-  { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
-  { NEON::BI__builtin_neon_vst1q_lane_bf16, 
NEON::BI__builtin_neon_vst1q_lane_v },
-  { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
-  { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v 
},
-  { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
-  { NEON::BI__builtin_neon_vst2q_lane_bf16, 
NEON::BI__builtin_neon_vst2q_lane_v },
-  { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
-  { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v 
},
-  { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
-  { NEON::BI__builtin_neon_vst3q_lane_bf16, 
NEON::BI__builtin_neon_vst3q_lane_v },
-  { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
-  { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v 
},
-  { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
-  { NEON::BI__builtin_neon_vst4q_lane_bf16, 
NEON::BI__builtin_neon_vst4q_lane_v },
   // The mangling rules cause us to have one ID for each type for 
vldap1(q)_lane
   // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
   // arbitrary one to be handled as tha canonical variation.
diff --git a/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c 
b/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c
index c93e3ca31896c..a0ea29cff0a08 100644
--- a/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN:  %clang_cc1_cg_arm64_neon -target-feature +bf16  -emit-llvm %s 
-disable-O0-optnone | opt -S -passes=mem2reg,sroa | FileCheck %s
+// RUN:  %clang_cc1_cg_arm64_neon -emit-llvm %s -disable-O0-optnone | opt -S 
-passes=mem2reg,sroa | FileCheck %s
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
 
diff --git a/clang/test/CodeGen/AArch64/bf16-lane-intrinsics.c 
b/clang/test/CodeGen/AArch64/bf16-lane-intrinsics.c
index ccd6d17412a8b..c8212908315f5 100644
--- a/clang/test/CodeGen/AArch64/bf16-lane-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/bf16-lane-intrinsics.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -target-feature +bf16 
\
+// RUN: %clang_cc1 -triple aarch64 -target-feature +neon \
 // RUN:  -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | 
FileCheck --check-prefix=CHECK-LE %s
-// RUN: %clang_cc1 -triple aarch64_be -target-feature +neon -target-feature 
+bf16 \
+// RUN: %clang_cc1 -triple aarch64_be -target-feature +neon \
 // RUN:  -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | 
FileCheck --check-prefix=CHECK-BE %s
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
diff --git a/clang/test/CodeGen/AArch64/bf16-ldst-intrinsics.c 
b/clang/test/CodeGen/AArch64/bf16-ldst-intrinsics.c
index 5d778e3b51d0e..44ddd578d81f2 100644
--- a/clang/test/CodeGen/AArch64/bf16-ldst-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/bf16-ldst-intrinsics.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -target-feature +bf16 
\
+// RUN: %clang_cc1 -triple aarch64 -target-feature +neon \
 // RUN:  -O2 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK64
-// RUN: %clang_cc1 -triple armv8.6a-arm-none-eabi -target-feature +neon 
-target-feature +bf16 -mfloat-abi hard \
+// RUN: %clang_cc1 -triple armv8.6a-arm-none-eabi -target-feature +neon 
-mfloat-abi hard \
 // RUN:  -O2 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK32
 
 // REQUIRES: arm-registered-target,aarch64-registered-target
diff --git a/clang/test/CodeGen/AArch64/bf16-reinterpret-intrinsics.c 
b/clang/test/CodeGen/AArch64/bf16-reinterpret-intrinsics.c
index 88f2305e2782c..007a0b1b32b9e 100644
--- a/clang/test/CodeGen/AArch64/bf16-reinterpret-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/bf16-reinterpret-intrinsics.c
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -target-feature +bf16 
\
+// RUN: %clang_cc1 -triple aarch64 -target-feature +neon \
 // RUN: -disable-O0-optnone -emit-llvm -o - %s \
 // RUN: | opt -S -passes=mem2reg,sroa \
 // RUN: | FileCheck %s
diff --git a/clang/test/CodeGen/arm-bf16-reinterpret-intrinsics.c 
b/clang/test/CodeGen/arm-bf16-reinterpret-intrinsics.c
index f8c3a94133131..de04466b3bce0 100644
--- a/clang/test/CodeGen/arm-bf16-reinterpret-intrinsics.c
+++ b/clang/test/CodeGen/arm-bf16-reinterpret-intrinsics.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple armv8.2a-arm-none-eabi -target-feature +neon 
-target-feature +bf16 -mfloat-abi hard \
+// RUN: %clang_cc1 -triple armv8.2a-arm-none-eabi -target-feature +neon 
-mfloat-abi hard \
 // RUN: -disable-O0-optnone -emit-llvm -o - %s \
 // RUN: | opt -S -passes=instcombine \
 // RUN: | FileCheck %s
diff --git a/clang/test/Sema/aarch64-neon-target.c 
b/clang/test/Sema/aarch64-neon-target.c
index ff1928832862d..6174a7d0a0694 100644
--- a/clang/test/Sema/aarch64-neon-target.c
+++ b/clang/test/Sema/aarch64-neon-target.c
@@ -93,9 +93,6 @@ void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t 
v8i16, uint8x16_t
   // bf16
   vbfdot_f32(v2f32, v4bf16, v4bf16); // expected-error {{always_inline 
function 'vbfdot_f32' requires target feature 'bf16'}}
   vcreate_bf16(10);
-  vdup_lane_bf16(v4bf16, 2); // expected-error 
{{'__builtin_neon_splat_lane_bf16' needs target feature bf16}}
-  vdup_n_bf16(bf16); // expected-error {{always_inline function 'vdup_n_bf16' 
requires target feature 'bf16'}}
-  vld1_bf16(0); // expected-error {{'__builtin_neon_vld1_bf16' needs target 
feature bf16}}
   vcvt_f32_bf16(v4bf16); // expected-error {{always_inline function 
'vcvt_f32_bf16' requires target feature 'bf16'}}
   vcvt_bf16_f32(v4f32); // expected-error {{always_inline function 
'vcvt_bf16_f32' requires target feature 'bf16'}}
   // f16mm / f16f32mm
diff --git a/clang/test/Sema/aarch64-neon-without-target-feature.cpp 
b/clang/test/Sema/aarch64-neon-without-target-feature.cpp
index 86dbb343198c5..97c01e0f51f5e 100644
--- a/clang/test/Sema/aarch64-neon-without-target-feature.cpp
+++ b/clang/test/Sema/aarch64-neon-without-target-feature.cpp
@@ -23,9 +23,9 @@ void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t 
v8i16, uint8x16_t
   // bf16
   vbfdot_f32(v2f32, v4bf16, v4bf16); // expected-error {{always_inline 
function 'vbfdot_f32' requires target feature 'neon'}}
   vcreate_bf16(10);
-  vdup_lane_bf16(v4bf16, 2); // expected-error 
{{'__builtin_neon_splat_lane_bf16' needs target feature bf16,neon}}
+  vdup_lane_bf16(v4bf16, 2); // expected-error {{'__builtin_neon_splat_lane_v' 
needs target feature neon}}
   vdup_n_bf16(bf16); // expected-error {{always_inline function 'vdup_n_bf16' 
requires target feature 'neon'}}
-  vld1_bf16(0); // expected-error {{'__builtin_neon_vld1_bf16' needs target 
feature bf16,neon}}
+  vld1_bf16(0); // expected-error {{'__builtin_neon_vld1_v' needs target 
feature neon}}
   vcvt_f32_bf16(v4bf16); // expected-error {{always_inline function 
'vcvt_f32_bf16' requires target feature 'neon'}}
   vcvt_bf16_f32(v4f32); // expected-error {{always_inline function 
'vcvt_bf16_f32' requires target feature 'neon'}}
   vmmlaq_f16_f16(v8f16, v8f16, v8f16); // expected-error {{always_inline 
function 'vmmlaq_f16_f16' requires target feature 'neon'}}
diff --git a/clang/test/Sema/arm-neon-target.c 
b/clang/test/Sema/arm-neon-target.c
index 1dc2b00925d61..f8d2da4aecc45 100644
--- a/clang/test/Sema/arm-neon-target.c
+++ b/clang/test/Sema/arm-neon-target.c
@@ -56,9 +56,6 @@ void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t 
v8i16, uint8x16_t
   // bf16
   vbfdot_f32(v2f32, v4bf16, v4bf16); // expected-error {{always_inline 
function 'vbfdot_f32' requires target feature 'bf16'}}
   vcreate_bf16(10);
-  vdup_lane_bf16(v4bf16, 2); // expected-error 
{{'__builtin_neon_splat_lane_bf16' needs target feature bf16}}
-  vdup_n_bf16(bf16); // expected-error {{always_inline function 'vdup_n_bf16' 
requires target feature 'bf16'}}
-  vld1_bf16(0); // expected-error {{'__builtin_neon_vld1_bf16' needs target 
feature bf16}}
   vcvt_f32_bf16(v4bf16); // expected-error {{always_inline function 
'vcvt_f32_bf16' requires target feature 'bf16'}}
   vcvt_bf16_f32(v4f32); // expected-error {{always_inline function 
'vcvt_bf16_f32' requires target feature 'bf16'}}
   // v8.1 - qrdmla

>From 6c18be124cfa3039957b013d50dd5bafefa85edf Mon Sep 17 00:00:00 2001
From: Paul Walker <[email protected]>
Date: Wed, 17 Jun 2026 13:54:13 +0000
Subject: [PATCH 2/2] Remove +bf16 requirement from luti builtins.

---
 clang/include/clang/Basic/arm_neon.td         | 19 ++++++++-----------
 clang/test/CodeGen/AArch64/neon-luti.c        |  4 ++--
 .../Sema/aarch64-neon-immediate-ranges/luti.c |  2 +-
 3 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/clang/include/clang/Basic/arm_neon.td 
b/clang/include/clang/Basic/arm_neon.td
index dda6ff59179bb..24689a6d7a0cb 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -2160,17 +2160,14 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = 
"lut" in {
                           [ImmCheck<3, ImmCheck0_1>]>;
   def VLUTI4_H_X2_Q : SInst<"vluti4_laneq_x2", ".2(<U)I", "QsQUsQPsQh",
                           [ImmCheck<3, ImmCheck0_3>]>;
-
-  let TargetGuard = "lut,bf16" in {
-    def VLUTI2_BF      : SInst<"vluti2_lane", "Q.(<qU)I", "bQb",
-                              [ImmCheck<2, ImmCheck0_3>]>;
-    def VLUTI2_BF_Q    : SInst<"vluti2_laneq", "Q.(<QU)I", "bQb",
-                              [ImmCheck<2, ImmCheck0_7>]>;
-    def VLUTI4_BF_X2   : SInst<"vluti4_lane_x2", ".2(<qU)I", "Qb",
-                              [ImmCheck<3, ImmCheck0_1>]>;
-    def VLUTI4_BF_X2_Q   : SInst<"vluti4_laneq_x2", ".2(<U)I", "Qb",
-                              [ImmCheck<3, ImmCheck0_3>]>;
-  }
+  def VLUTI2_BF      : SInst<"vluti2_lane", "Q.(<qU)I", "bQb",
+                            [ImmCheck<2, ImmCheck0_3>]>;
+  def VLUTI2_BF_Q    : SInst<"vluti2_laneq", "Q.(<QU)I", "bQb",
+                            [ImmCheck<2, ImmCheck0_7>]>;
+  def VLUTI4_BF_X2   : SInst<"vluti4_lane_x2", ".2(<qU)I", "Qb",
+                            [ImmCheck<3, ImmCheck0_1>]>;
+  def VLUTI4_BF_X2_Q   : SInst<"vluti4_laneq_x2", ".2(<U)I", "Qb",
+                            [ImmCheck<3, ImmCheck0_3>]>;
 }
 
 let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,neon" in {
diff --git a/clang/test/CodeGen/AArch64/neon-luti.c 
b/clang/test/CodeGen/AArch64/neon-luti.c
index 4b485636d45b1..4017bfa315a66 100644
--- a/clang/test/CodeGen/AArch64/neon-luti.c
+++ b/clang/test/CodeGen/AArch64/neon-luti.c
@@ -1,8 +1,8 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
 // REQUIRES: aarch64-registered-target
 #include <arm_neon.h>
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon 
-target-feature +lut -target-feature +bf16 -O3 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon 
-target-feature +lut -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon 
-target-feature +lut -O3 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon 
-target-feature +lut -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_u8(
 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/luti.c 
b/clang/test/Sema/aarch64-neon-immediate-ranges/luti.c
index bed8cbc1481dd..9daf3018273de 100644
--- a/clang/test/Sema/aarch64-neon-immediate-ranges/luti.c
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/luti.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon 
-target-feature +lut -target-feature +bf16 -ffreestanding -fsyntax-only -verify 
%s
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon 
-target-feature +lut -ffreestanding -fsyntax-only -verify %s
 
 #include <arm_neon.h>
 // REQUIRES: aarch64-registered-target

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to