fpetrogalli created this revision. fpetrogalli added reviewers: c-rhodes, kmclaughlin, efriedma, sdesmalen, ctetreau. Herald added subscribers: llvm-commits, cfe-commits, psnobl, rkruppe, hiraditya, tschuett. Herald added projects: clang, LLVM.
The following intrinsics has been added: svuint16_t svcnt[_bf16]_m(svuint16_t inactive, svbool_t pg, svbfloat16_t op) svuint16_t svcnt[_bf16]_x(svbool_t pg, svbfloat16_t op) svuint16_t svcnt[_bf16]_z(svbool_t pg, svbfloat16_t op) svbfloat16_t svtbl[_bf16](svbfloat16_t data, svuint16_t indices) svbfloat16_t svtbl2[_bf16](svbfloat16x2_t data, svuint16_t indices) svbfloat16_t svtbx[_bf16](svbfloat16_t fallback, svbfloat16_t data, svuint16_t indices) Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D82429 Files: clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt-bfloat.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl-bfloat.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2-bfloat.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbx-bfloat.c llvm/lib/Target/AArch64/SVEInstrFormats.td llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll
Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll +++ llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll @@ -122,6 +122,16 @@ ret <vscale x 8 x half> %out } +define <vscale x 8 x bfloat> @ftbx_h_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x i16> %c) { +; CHECK-LABEL: ftbx_h_bf16: +; CHECK: tbx z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbx.nxv8bf16(<vscale x 8 x bfloat> %a, + <vscale x 8 x bfloat> %b, + <vscale x 8 x i16> %c) + ret <vscale x 8 x bfloat> %out +} + define <vscale x 4 x i32> @tbx_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { ; CHECK-LABEL: tbx_s: ; CHECK: tbx z0.s, z1.s, z2.s @@ -179,3 +189,5 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.tbx.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i16>) declare <vscale x 4 x float> @llvm.aarch64.sve.tbx.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i32>) declare <vscale x 2 x double> @llvm.aarch64.sve.tbx.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i64>) + +declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tbx.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i16>) Index: llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll @@ -1009,6 +1009,15 @@ ret <vscale x 8 x half> %out } +define <vscale x 8 x bfloat> @tbl_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: tbl_bf16: +; CHECK: tbl z0.h, { z0.h }, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x bfloat> %out +} + define <vscale x 4 x float> @tbl_f32(<vscale x 4 x float> %a, <vscale x 4 x i32> %b) { ; CHECK-LABEL: tbl_f32: ; CHECK: tbl z0.s, { z0.s }, z1.s @@ -1859,6 +1868,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) declare <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) declare <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i16>) +declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i16>) declare <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i32>) declare <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i64>) Index: llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll @@ -145,6 +145,16 @@ ret <vscale x 8 x i16> %out } +define <vscale x 8 x i16> @cnt_bf16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %b) { +; CHECK-LABEL: cnt_bf16: +; CHECK: cnt z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> %a, + <vscale x 8 x i1> %pg, + <vscale x 8 x bfloat> %b) + ret <vscale x 8 x i16> %out +} + define <vscale x 4 x i32> @cnt_f32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) { ; CHECK-LABEL: cnt_f32: ; CHECK: cnt z0.s, p0/m, z1.s @@ -180,5 +190,6 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>) declare <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>) declare <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x half>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x bfloat>) declare <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x float>) declare <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x double>) Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1020,6 +1020,8 @@ def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>; def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>; def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>; + + def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8i16, !cast<Instruction>(NAME # _H)>; } multiclass sve2_int_perm_tbl<string asm, SDPatternOperator op> { @@ -1053,6 +1055,11 @@ nxv8f16:$Op2, zsub1), nxv8i16:$Op3))>; + def : Pat<(nxv8bf16 (op nxv8bf16:$Op1, nxv8bf16:$Op2, nxv8i16:$Op3)), + (nxv8bf16 (!cast<Instruction>(NAME # _H) (REG_SEQUENCE ZPR2, nxv8bf16:$Op1, zsub0, + nxv8bf16:$Op2, zsub1), + nxv8i16:$Op3))>; + def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4i32:$Op3)), (nxv4f32 (!cast<Instruction>(NAME # _S) (REG_SEQUENCE ZPR2, nxv4f32:$Op1, zsub0, nxv4f32:$Op2, zsub1), @@ -1097,6 +1104,8 @@ def : SVE_3_Op_Pat<nxv8f16, op, nxv8f16, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>; def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>; def : SVE_3_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>; + + def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, nxv8i16, !cast<Instruction>(NAME # _H)>; } class sve_int_perm_reverse_z<bits<2> sz8_64, string asm, ZPRRegOp zprty> @@ -3685,9 +3694,10 @@ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>; def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>; - def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8bf16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>; } multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm, Index: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbx-bfloat.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbx-bfloat.c @@ -0,0 +1,23 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svbfloat16_t test_svtbx_bf16(svbfloat16_t fallback, svbfloat16_t data, svuint16_t indices) { + // CHECK-LABEL: @test_svtbx_bf16( + // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbx.nxv8bf16(<vscale x 8 x bfloat> %fallback, <vscale x 8 x bfloat> %data, <vscale x 8 x i16> %indices) + // CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svtbx'}} + // expected-warning@+1 {{implicit declaration of function 'svtbx_bf16'}} + return SVE_ACLE_FUNC(svtbx, _bf16, , )(fallback, data, indices); +} Index: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2-bfloat.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2-bfloat.c @@ -0,0 +1,25 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svbfloat16_t test_svtbl2_bf16(svbfloat16x2_t data, svuint16_t indices) { + // CHECK-LABEL: test_svtbl2_bf16 + // CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> %data, i32 1) + // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl2.nxv8bf16(<vscale x 8 x bfloat> %[[V0]], <vscale x 8 x bfloat> %[[V1]], <vscale x 8 x i16> %indices) + // CHECK-NEXT: ret <vscale x 8 x bfloat> %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svtbl2'}} + // expected-warning@+1 {{implicit declaration of function 'svtbl2_bf16'}} + return SVE_ACLE_FUNC(svtbl2, _bf16, , )(data, indices); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl-bfloat.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl-bfloat.c @@ -0,0 +1,26 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t +// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s + +// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it. +// ASM-NOT: warning +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svbfloat16_t test_svtbl_bf16(svbfloat16_t data, svuint16_t indices) { + // CHECK-LABEL: test_svtbl_bf16 + // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i16> %indices) + // CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svtbl_bf16'}} + return SVE_ACLE_FUNC(svtbl, _bf16, , )(data, indices); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt-bfloat.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt-bfloat.c @@ -0,0 +1,44 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t +// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s + +// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it. +// ASM-NOT: warning +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svuint16_t test_svcnt_bf16_z(svbool_t pg, svbfloat16_t op) { + // CHECK-LABEL: test_svcnt_bf16_z + // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %[[PG]], <vscale x 8 x bfloat> %op) + // CHECK: ret <vscale x 8 x i16> %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svcnt_bf16_z'}} + return SVE_ACLE_FUNC(svcnt, _bf16, _z, )(pg, op); +} + +svuint16_t test_svcnt_bf16_m(svuint16_t inactive, svbool_t pg, svbfloat16_t op) { + // CHECK-LABEL: test_svcnt_bf16_m + // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> %inactive, <vscale x 8 x i1> %[[PG]], <vscale x 8 x bfloat> %op) + // CHECK: ret <vscale x 8 x i16> %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svcnt_bf16_m'}} + return SVE_ACLE_FUNC(svcnt, _bf16, _m, )(inactive, pg, op); +} +svuint16_t test_svcnt_bf16_x(svbool_t pg, svbfloat16_t op) { + // CHECK-LABEL: test_svcnt_bf16_x + // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %[[PG]], <vscale x 8 x bfloat> %op) + // CHECK: ret <vscale x 8 x i16> %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svcnt_bf16_x'}} + return SVE_ACLE_FUNC(svcnt, _bf16, _x, )(pg, op); +} Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -8462,6 +8462,7 @@ case SVE::BI__builtin_sve_svtbl2_u64: case SVE::BI__builtin_sve_svtbl2_s64: case SVE::BI__builtin_sve_svtbl2_f16: + case SVE::BI__builtin_sve_svtbl2_bf16: case SVE::BI__builtin_sve_svtbl2_f32: case SVE::BI__builtin_sve_svtbl2_f64: { SVETypeFlags TF(Builtin->TypeModifier); Index: clang/include/clang/Basic/arm_sve.td =================================================================== --- clang/include/clang/Basic/arm_sve.td +++ clang/include/clang/Basic/arm_sve.td @@ -903,7 +903,9 @@ defm SVCLS : SInstCLS<"svcls", "csil", "aarch64_sve_cls">; defm SVCLZ : SInstCLS<"svclz", "csilUcUsUiUl", "aarch64_sve_clz">; defm SVCNT : SInstCLS<"svcnt", "csilUcUsUiUlhfd", "aarch64_sve_cnt">; - +let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { + defm SVCNT_BF16 : SInstCLS<"svcnt", "b", "aarch64_sve_cnt">; +} //////////////////////////////////////////////////////////////////////////////// // Conversion @@ -1153,6 +1155,9 @@ def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_sel">; def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice">; def SVTBL : SInst<"svtbl[_{d}]", "ddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl">; +let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { + def SVTBL_BF16 : SInst<"svtbl[_{d}]", "ddu", "b", MergeNone, "aarch64_sve_tbl">; +} def SVTRN1 : SInst<"svtrn1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1">; def SVTRN2 : SInst<"svtrn2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2">; def SVUNPKHI_S : SInst<"svunpkhi[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpkhi">; @@ -1920,6 +1925,11 @@ def SVTBX : SInst<"svtbx[_{d}]", "dddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbx">; } +let ArchGuard = "defined(__ARM_FEATURE_SVE2) && defined(__ARM_FEATURE_SVE_BF16)" in { +def SVTBL2_BF16 : SInst<"svtbl2[_{d}]", "d2u", "b", MergeNone>; +def SVTBX_BF16 : SInst<"svtbx[_{d}]", "dddu", "b", MergeNone, "aarch64_sve_tbx">; +} + //////////////////////////////////////////////////////////////////////////////// // SVE2 - Optional
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits